From 98b8e04b8247d2e8f014edd3bf387882613c0b77 Mon Sep 17 00:00:00 2001 From: Arseniy Obolenskiy Date: Sun, 19 Jan 2025 17:45:50 +0100 Subject: [PATCH] [CPU] Enable and address google-* clang-tidy remarks except google-explicit-constructor --- .../snippets/src/utils/debug_caps_config.cpp | 2 +- src/plugins/intel_cpu/src/.clang-tidy | 10 +- src/plugins/intel_cpu/src/compiled_model.cpp | 46 +- src/plugins/intel_cpu/src/config.cpp | 31 +- src/plugins/intel_cpu/src/cpu_memory.cpp | 28 +- src/plugins/intel_cpu/src/cpu_tensor.cpp | 3 +- .../intel_cpu/src/dnnl_extension_utils.cpp | 9 +- .../intel_cpu/src/dnnl_postops_composer.cpp | 126 ++-- .../src/dnnl_postops_composer_legacy.cpp | 51 +- src/plugins/intel_cpu/src/edge.cpp | 111 ++-- .../emitters/plugin/aarch64/jit_emitter.cpp | 48 +- .../aarch64/jit_load_store_emitters.cpp | 6 +- .../emitters/plugin/x64/jit_bf16_emitters.hpp | 3 +- .../plugin/x64/jit_conversion_emitters.cpp | 88 +-- .../emitters/plugin/x64/jit_dnnl_emitters.cpp | 9 +- .../plugin/x64/jit_dnnl_ext_emitters.hpp | 7 +- .../plugin/x64/jit_eltwise_emitters.cpp | 90 +-- .../src/emitters/plugin/x64/jit_emitter.cpp | 72 ++- .../plugin/x64/jit_load_store_emitters.cpp | 158 +++-- .../src/emitters/plugin/x64/utils.cpp | 21 +- .../snippets/aarch64/cpu_generator.cpp | 15 +- .../snippets/aarch64/jit_fill_emitter.cpp | 11 +- .../snippets/aarch64/jit_fill_emitter.hpp | 2 +- .../snippets/aarch64/jit_kernel_emitter.cpp | 25 +- .../snippets/aarch64/jit_loop_emitters.cpp | 6 +- .../snippets/cpu_runtime_configurator.cpp | 3 +- .../snippets/utils/debug_caps_config.cpp | 2 +- .../emitters/snippets/x64/cpu_generator.cpp | 16 +- .../snippets/x64/jit_binary_call_emitter.cpp | 6 +- .../x64/jit_brgemm_copy_b_emitter.cpp | 6 +- .../snippets/x64/jit_brgemm_emitter.cpp | 16 +- .../snippets/x64/jit_fill_emitter.cpp | 12 +- .../snippets/x64/jit_fill_emitter.hpp | 2 +- .../snippets/x64/jit_horizon_emitter.cpp | 3 +- .../snippets/x64/jit_kernel_emitter.cpp | 28 +- .../snippets/x64/jit_loop_emitters.cpp | 3 +- .../snippets/x64/jit_memory_emitters.cpp | 12 +- .../snippets/x64/jit_snippets_emitters.cpp | 3 +- .../snippets/x64/kernel_executors/brgemm.cpp | 3 +- .../x64/kernel_executors/brgemm_amx.cpp | 3 +- .../x64/kernel_executors/brgemm_base.cpp | 9 +- .../x64/kernel_executors/brgemm_copy_b.cpp | 11 +- .../src/emitters/snippets/x64/utils.cpp | 15 +- .../emitters/tpp/x64/jit_equation_emitter.cpp | 6 +- .../src/emitters/tpp/x64/jit_tpp_emitter.cpp | 12 +- src/plugins/intel_cpu/src/emitters/utils.cpp | 6 +- src/plugins/intel_cpu/src/graph.cpp | 156 +++-- src/plugins/intel_cpu/src/graph_context.cpp | 3 +- src/plugins/intel_cpu/src/graph_dumper.cpp | 15 +- src/plugins/intel_cpu/src/graph_optimizer.cpp | 541 ++++++++++++------ src/plugins/intel_cpu/src/hash_builder.hpp | 3 +- src/plugins/intel_cpu/src/infer_request.cpp | 27 +- src/plugins/intel_cpu/src/memory_control.cpp | 14 +- .../src/memory_desc/blocked_memory_desc.cpp | 11 +- .../memory_desc/cpu_blocked_memory_desc.cpp | 15 +- .../memory_desc/dnnl_blocked_memory_desc.cpp | 66 ++- .../src/memory_desc/dnnl_memory_desc.cpp | 6 +- src/plugins/intel_cpu/src/node.cpp | 247 +++++--- .../intel_cpu/src/nodes/adaptive_pooling.cpp | 21 +- .../intel_cpu/src/nodes/batch_to_space.cpp | 15 +- src/plugins/intel_cpu/src/nodes/bin_conv.cpp | 118 ++-- src/plugins/intel_cpu/src/nodes/broadcast.cpp | 24 +- src/plugins/intel_cpu/src/nodes/bucketize.cpp | 24 +- .../src/nodes/causal_mask_preprocess.cpp | 16 +- src/plugins/intel_cpu/src/nodes/col2im.cpp | 3 +- .../intel_cpu/src/nodes/color_convert.cpp | 47 +- .../src/nodes/common/cpu_convert.cpp | 51 +- .../src/nodes/common/permute_kernel.cpp | 6 +- .../intel_cpu/src/nodes/common/softmax.cpp | 16 +- .../src/nodes/common/tile_broadcast_utils.cpp | 10 +- src/plugins/intel_cpu/src/nodes/concat.cpp | 54 +- src/plugins/intel_cpu/src/nodes/conv.cpp | 99 ++-- src/plugins/intel_cpu/src/nodes/convert.cpp | 21 +- .../src/nodes/ctc_greedy_decoder.cpp | 24 +- .../src/nodes/ctc_greedy_decoder_seq_len.cpp | 27 +- src/plugins/intel_cpu/src/nodes/ctc_loss.cpp | 29 +- src/plugins/intel_cpu/src/nodes/cum_sum.cpp | 39 +- src/plugins/intel_cpu/src/nodes/deconv.cpp | 96 ++-- src/plugins/intel_cpu/src/nodes/def_conv.cpp | 83 +-- .../intel_cpu/src/nodes/depth_to_space.cpp | 48 +- .../intel_cpu/src/nodes/detection_output.cpp | 70 ++- src/plugins/intel_cpu/src/nodes/dft.cpp | 22 +- src/plugins/intel_cpu/src/nodes/eltwise.cpp | 252 +++++--- .../intel_cpu/src/nodes/embedding_bag.cpp | 9 +- .../src/nodes/embedding_bag_offsets.cpp | 35 +- .../src/nodes/embedding_bag_packed.cpp | 24 +- .../src/nodes/embedding_segments_sum.cpp | 33 +- .../src/nodes/executors/acl/acl_deconv.cpp | 3 +- .../src/nodes/executors/acl/acl_eltwise.cpp | 57 +- .../acl/acl_fullyconnected_utils.cpp | 3 +- .../src/nodes/executors/acl/acl_mvn.cpp | 3 +- .../src/nodes/executors/acl/acl_pooling.cpp | 9 +- .../src/nodes/executors/acl/acl_reduce.cpp | 3 +- .../src/nodes/executors/acl/acl_utils.hpp | 12 +- .../nodes/executors/common/common_utils.hpp | 15 +- .../nodes/executors/common/ref_transpose.cpp | 11 +- .../dnnl/dnnl_convolution_primitive.cpp | 3 +- .../executors/dnnl/dnnl_fullyconnected.hpp | 12 +- .../dnnl/dnnl_fullyconnected_primitive.cpp | 51 +- .../executors/dnnl/dnnl_matmul_primitive.cpp | 12 +- .../executors/executor_implementation.hpp | 3 +- .../fullyconnected_implementations.cpp | 12 +- .../src/nodes/executors/interpolate.cpp | 20 +- .../src/nodes/executors/mlas/mlas_gemm.cpp | 3 +- .../nodes/executors/mlas/mlas_transpose.cpp | 8 +- .../nodes/executors/precision_translation.cpp | 6 +- .../nodes/executors/precision_translation.hpp | 6 +- .../intel_cpu/src/nodes/executors/shl/shl.hpp | 10 +- .../src/nodes/executors/shl/shl_eltwise.cpp | 3 +- .../executors/shl/shl_fullyconnected.cpp | 3 +- .../src/nodes/executors/subgraph.cpp | 32 +- .../src/nodes/executors/subgraph.hpp | 15 +- .../src/nodes/executors/transpose.cpp | 6 +- .../src/nodes/executors/x64/jit_transpose.cpp | 3 +- .../src/nodes/executors/x64/subgraph.cpp | 13 +- .../src/nodes/executors/x64/subgraph.hpp | 3 +- ...xperimental_detectron_detection_output.cpp | 12 +- ...ectron_generate_proposals_single_image.cpp | 18 +- ...erimental_detectron_priorgridgenerator.cpp | 6 +- ...rimental_detectron_roifeatureextractor.cpp | 6 +- .../nodes/experimental_detectron_topkrois.cpp | 12 +- .../src/nodes/extract_image_patches.cpp | 39 +- src/plugins/intel_cpu/src/nodes/eye.cpp | 30 +- .../intel_cpu/src/nodes/fake_quantize.cpp | 324 +++++++---- .../intel_cpu/src/nodes/fullyconnected.cpp | 18 +- src/plugins/intel_cpu/src/nodes/gather.cpp | 99 ++-- .../intel_cpu/src/nodes/gather_elements.cpp | 24 +- src/plugins/intel_cpu/src/nodes/gather_nd.cpp | 45 +- .../intel_cpu/src/nodes/gather_tree.cpp | 55 +- .../src/nodes/generate_proposals.cpp | 18 +- .../intel_cpu/src/nodes/grid_sample.cpp | 27 +- src/plugins/intel_cpu/src/nodes/grn.cpp | 36 +- src/plugins/intel_cpu/src/nodes/if.cpp | 15 +- src/plugins/intel_cpu/src/nodes/input.cpp | 58 +- .../intel_cpu/src/nodes/interaction.cpp | 9 +- .../intel_cpu/src/nodes/interpolate.cpp | 218 ++++--- .../nodes/kernels/aarch64/brgemm_kernel.cpp | 6 +- .../aarch64/jit_uni_eltwise_generic.cpp | 48 +- .../src/nodes/kernels/acl/gemm_kernel.cpp | 13 +- .../nodes/kernels/scaled_attn/attn_memcpy.cpp | 6 +- .../nodes/kernels/scaled_attn/attn_quant.cpp | 13 +- .../kernels/scaled_attn/attn_quant_kernel.hpp | 4 +- .../nodes/kernels/scaled_attn/executor_pa.cpp | 38 +- .../scaled_attn/executor_pa_common.cpp | 11 +- .../kernels/scaled_attn/mha_single_token.cpp | 6 +- .../kernels/scaled_attn/softmax_kernel.hpp | 18 +- .../src/nodes/kernels/x64/brgemm_kernel.cpp | 23 +- .../nodes/kernels/x64/gather_uni_kernel.cpp | 78 ++- .../src/nodes/kernels/x64/grid_sample.cpp | 67 ++- .../src/nodes/kernels/x64/jit_kernel.cpp | 35 +- .../src/nodes/kernels/x64/jit_kernel.hpp | 18 +- .../src/nodes/kernels/x64/jit_kernel_base.cpp | 51 +- .../src/nodes/kernels/x64/mlp_kernel.cpp | 3 +- .../src/nodes/kernels/x64/mlp_kernel.hpp | 26 +- .../src/nodes/kernels/x64/mlp_utils.cpp | 6 +- .../nodes/kernels/x64/non_max_suppression.cpp | 6 +- .../src/nodes/kernels/x64/rdft_kernel.cpp | 3 +- .../src/nodes/kernels/x64/rms_kernel.cpp | 3 +- .../src/nodes/kernels/x64/rope_kernel.cpp | 3 +- src/plugins/intel_cpu/src/nodes/llm_mlp.cpp | 21 +- .../intel_cpu/src/nodes/log_softmax.cpp | 45 +- src/plugins/intel_cpu/src/nodes/lrn.cpp | 24 +- .../intel_cpu/src/nodes/mathematics.cpp | 15 +- src/plugins/intel_cpu/src/nodes/matmul.cpp | 50 +- .../intel_cpu/src/nodes/matrix_nms.cpp | 61 +- src/plugins/intel_cpu/src/nodes/memory.cpp | 12 +- src/plugins/intel_cpu/src/nodes/mha.cpp | 42 +- .../intel_cpu/src/nodes/multiclass_nms.cpp | 83 ++- src/plugins/intel_cpu/src/nodes/mvn.cpp | 199 ++++--- src/plugins/intel_cpu/src/nodes/ngram.cpp | 6 +- .../src/nodes/non_max_suppression.cpp | 27 +- src/plugins/intel_cpu/src/nodes/non_zero.cpp | 15 +- src/plugins/intel_cpu/src/nodes/normalize.cpp | 90 +-- src/plugins/intel_cpu/src/nodes/one_hot.cpp | 9 +- src/plugins/intel_cpu/src/nodes/pad.cpp | 92 ++- .../intel_cpu/src/nodes/paged_attn.cpp | 9 +- src/plugins/intel_cpu/src/nodes/pooling.cpp | 60 +- src/plugins/intel_cpu/src/nodes/priorbox.cpp | 14 +- .../src/nodes/priorbox_clustered.cpp | 12 +- src/plugins/intel_cpu/src/nodes/proposal.cpp | 6 +- .../intel_cpu/src/nodes/proposal_imp.cpp | 12 +- .../intel_cpu/src/nodes/psroi_pooling.cpp | 53 +- .../intel_cpu/src/nodes/psroi_pooling.h | 4 +- src/plugins/intel_cpu/src/nodes/qkv_proj.cpp | 14 +- .../intel_cpu/src/nodes/random_uniform.cpp | 18 +- src/plugins/intel_cpu/src/nodes/range.cpp | 33 +- src/plugins/intel_cpu/src/nodes/rdft.cpp | 46 +- src/plugins/intel_cpu/src/nodes/reduce.cpp | 266 ++++++--- src/plugins/intel_cpu/src/nodes/reference.cpp | 3 +- .../intel_cpu/src/nodes/region_yolo.cpp | 24 +- src/plugins/intel_cpu/src/nodes/reorder.cpp | 69 ++- .../intel_cpu/src/nodes/reorg_yolo.cpp | 9 +- src/plugins/intel_cpu/src/nodes/reshape.cpp | 18 +- .../intel_cpu/src/nodes/reverse_sequence.cpp | 59 +- src/plugins/intel_cpu/src/nodes/rms_norm.cpp | 9 +- src/plugins/intel_cpu/src/nodes/rnn.cpp | 115 ++-- src/plugins/intel_cpu/src/nodes/roi_align.cpp | 61 +- .../intel_cpu/src/nodes/roi_align_rotated.cpp | 3 +- .../intel_cpu/src/nodes/roi_pooling.cpp | 61 +- src/plugins/intel_cpu/src/nodes/roll.cpp | 39 +- src/plugins/intel_cpu/src/nodes/rope.cpp | 26 +- .../intel_cpu/src/nodes/scaled_attn.cpp | 89 ++- .../intel_cpu/src/nodes/scatter_update.cpp | 45 +- .../intel_cpu/src/nodes/search_sorted.cpp | 3 +- src/plugins/intel_cpu/src/nodes/shapeof.cpp | 15 +- .../intel_cpu/src/nodes/shuffle_channels.cpp | 41 +- src/plugins/intel_cpu/src/nodes/softmax.cpp | 27 +- .../intel_cpu/src/nodes/space_to_batch.cpp | 15 +- .../intel_cpu/src/nodes/space_to_depth.cpp | 48 +- src/plugins/intel_cpu/src/nodes/split.cpp | 30 +- src/plugins/intel_cpu/src/nodes/stft.cpp | 3 +- .../intel_cpu/src/nodes/strided_slice.cpp | 139 +++-- .../src/nodes/string_tensor_pack.cpp | 3 +- .../src/nodes/string_tensor_unpack.cpp | 3 +- src/plugins/intel_cpu/src/nodes/subgraph.cpp | 63 +- .../intel_cpu/src/nodes/tensoriterator.cpp | 80 ++- src/plugins/intel_cpu/src/nodes/tile.cpp | 33 +- src/plugins/intel_cpu/src/nodes/topk.cpp | 90 ++- src/plugins/intel_cpu/src/nodes/transpose.cpp | 24 +- src/plugins/intel_cpu/src/nodes/unique.cpp | 3 +- src/plugins/intel_cpu/src/onednn/dnnl.cpp | 2 +- .../intel_cpu/src/onednn/iml_type_mapper.cpp | 3 +- src/plugins/intel_cpu/src/plugin.cpp | 49 +- .../src/shape_inference/custom/eltwise.cpp | 6 +- .../src/shape_inference/custom/one_hot.cpp | 3 +- .../shape_inference/custom/scaled_attn.cpp | 9 +- .../src/shape_inference/static_dimension.cpp | 6 +- .../src/shape_inference/static_shape.cpp | 12 +- .../cpu_opset/common/op/sdpa.cpp | 9 +- .../cpu_opset/common/op/submodel.cpp | 6 +- .../common/pass/align_matmul_input_ranks.cpp | 23 +- .../pass/causal_mask_preprocess_fusion.cpp | 27 +- .../pass/convert_broadcast_to_tiles.cpp | 3 +- .../pass/convert_fq_rnn_to_quantized_rnn.cpp | 9 +- .../common/pass/convert_matmul_to_fc.cpp | 3 +- .../common/pass/convert_tile_to_seq_tiles.cpp | 6 +- .../common/pass/convert_to_power_static.cpp | 18 +- .../pass/move_fc_reshape_to_weights.cpp | 15 +- .../cpu_opset/common/pass/ngram_fusion.cpp | 6 +- .../pass/rnn_sequences_optimization.cpp | 9 +- .../common/pass/stateful_sdpa_fusion.cpp | 19 +- .../common/pass/swap_convert_transpose.cpp | 3 +- .../cpu_opset/x64/op/llm_mlp.cpp | 6 +- .../x64/pass/convert_to_interaction.cpp | 3 +- .../cpu_opset/x64/pass/mha_fusion.cpp | 141 +++-- .../cpu_opset/x64/pass/mlp_fusion.cpp | 27 +- .../cpu_opset/x64/pass/qkv_proj_fusion.cpp | 36 +- .../aarch64/pass/snippets_mark_skipped.cpp | 42 +- .../snippets/aarch64/shape_inference.cpp | 3 +- .../snippets/x64/op/brgemm_utils.cpp | 20 +- .../x64/pass/brgemm_to_brgemm_cpu.cpp | 11 +- .../x64/pass/eliminate_brgemm_copy_b.cpp | 3 +- .../snippets/x64/pass/enforce_precision.cpp | 6 +- .../adjust_brgemm_copy_b_loop_ports.cpp | 12 +- .../brgemm_copy_b_loop_ports_adjuster.cpp | 9 +- .../x64/pass/lowered/brgemm_cpu_blocking.cpp | 12 +- .../lowered/fuse_load_store_and_convert.cpp | 21 +- .../x64/pass/snippets_mark_skipped.cpp | 106 ++-- .../snippets/x64/shape_inference.cpp | 3 +- .../transformations/tpp/x64/op/eltwise.cpp | 14 +- .../transformations/tpp/x64/op/equation.cpp | 3 +- .../transformations/tpp/x64/op/factory.cpp | 7 +- .../tpp/x64/pass/brgemm_to_brgemm_tpp.cpp | 7 +- .../tpp/x64/pass/eltwise_to_eltwise_tpp.cpp | 3 +- .../tpp/x64/pass/fuse_tpp_to_equations.cpp | 9 +- .../x64/pass/lowered/brgemm_tpp_blocking.cpp | 3 +- .../x64/pass/lowered/set_tpp_leading_dim.cpp | 9 +- .../tpp/x64/pass/scalar_to_scalar_tpp.cpp | 3 +- .../transformation_pipeline.cpp | 76 ++- .../intel_cpu/src/transformations/utils.cpp | 15 +- src/plugins/intel_cpu/src/utils/bfloat16.hpp | 2 +- src/plugins/intel_cpu/src/utils/blob_dump.cpp | 82 ++- src/plugins/intel_cpu/src/utils/cpu_utils.hpp | 21 +- .../intel_cpu/src/utils/ngraph_utils.hpp | 3 +- .../intel_cpu/src/utils/plain_tensor.hpp | 52 +- .../intel_cpu/src/utils/precision_support.cpp | 12 +- src/plugins/intel_cpu/src/weights_cache.cpp | 12 +- 277 files changed, 5938 insertions(+), 3159 deletions(-) diff --git a/src/common/snippets/src/utils/debug_caps_config.cpp b/src/common/snippets/src/utils/debug_caps_config.cpp index b80795b12912cd..4f29b109b5fb98 100644 --- a/src/common/snippets/src/utils/debug_caps_config.cpp +++ b/src/common/snippets/src/utils/debug_caps_config.cpp @@ -14,7 +14,7 @@ void DebugCapsConfig::readProperties() { if (env && *env) return env; - return (const char*)nullptr; + return static_cast(nullptr); }; const char* envVarValue = nullptr; diff --git a/src/plugins/intel_cpu/src/.clang-tidy b/src/plugins/intel_cpu/src/.clang-tidy index b86cc0e063da84..c2c40baacdb90f 100644 --- a/src/plugins/intel_cpu/src/.clang-tidy +++ b/src/plugins/intel_cpu/src/.clang-tidy @@ -6,7 +6,6 @@ ### Scopes to be enabled: # # cppcoreguidelines-*, -# google-*, # readability-*, # modernize-*, # bugprone-*, @@ -26,7 +25,9 @@ # -bugprone-fold-init-type # -bugprone-implicit-widening-of-multiplication-result # -cppcoreguidelines-narrowing-conversions -# -google-readability-braces-around-statements +# -google-default-arguments, +# -google-explicit-constructor, +# -google-readability-casting, # -readability-implicit-bool-conversion, # -readability-magic-numbers, cppcoreguidelines-avoid-magic-numbers # -readability-function-cognitive-complexity. Reasonable way to enforce splitting complex code into simple functions @@ -35,6 +36,7 @@ Checks: > -*, performance-*, + google-*, modernize-pass-by-value, cppcoreguidelines-prefer-member-initializer, -bugprone-easily-swappable-parameters, @@ -44,9 +46,11 @@ Checks: > -cppcoreguidelines-narrowing-conversions, -cppcoreguidelines-pro-bounds-pointer-arithmetic, -google-build-using-namespace, + -google-default-arguments, + -google-explicit-constructor, + -google-readability-casting, -google-readability-todo, -readability-braces-around-statements, - -google-readability-braces-around-statements, -modernize-use-trailing-return-type, -readability-identifier-length, -readability-implicit-bool-conversion, diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp index c9b7f45222d155..e4d1662f96a308 100644 --- a/src/plugins/intel_cpu/src/compiled_model.cpp +++ b/src/plugins/intel_cpu/src/compiled_model.cpp @@ -57,8 +57,9 @@ CompiledModel::CompiledModel(const std::shared_ptr& model, m_sub_memory_manager(std::move(sub_memory_manager)) { m_mutex = std::make_shared(); const auto& core = m_plugin->get_core(); - if (!core) + if (!core) { OPENVINO_THROW("Unable to get API version. Core is unavailable"); + } IStreamsExecutor::Config executor_config; if (m_cfg.exclusiveAsyncRequests) { @@ -81,10 +82,12 @@ CompiledModel::CompiledModel(const std::shared_ptr& model, m_callback_executor = m_task_executor; } - if (m_task_executor) + if (m_task_executor) { set_task_executor(m_task_executor); - if (m_callback_executor) + } + if (m_callback_executor) { set_callback_executor(m_callback_executor); + } int streams = std::max(1, executor_config.get_streams()); std::vector tasks; @@ -208,15 +211,17 @@ std::shared_ptr CompiledModel::create_infer_request() co } std::shared_ptr CompiledModel::get_runtime_model() const { - if (m_graphs.empty()) + if (m_graphs.empty()) { OPENVINO_THROW("No graph was found"); + } return get_graph()._graph.dump(); } ov::Any CompiledModel::get_property(const std::string& name) const { - if (m_graphs.empty()) + if (m_graphs.empty()) { OPENVINO_THROW("No graph was found"); + } if (name == ov::loaded_from_cache) { return m_loaded_from_cache; @@ -275,7 +280,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const { return decltype(ov::model_name)::value_type(modelName); } else if (name == ov::optimal_number_of_infer_requests) { const auto streams = config.streamExecutorConfig.get_streams(); - return decltype(ov::optimal_number_of_infer_requests)::value_type( + return static_cast( streams > 0 ? streams : 1); // ov::optimal_number_of_infer_requests has no negative values } else if (name == ov::num_streams) { const auto streams = config.streamExecutorConfig.get_streams(); @@ -283,22 +288,22 @@ ov::Any CompiledModel::get_property(const std::string& name) const { streams); // ov::num_streams has special negative values (AUTO = -1, NUMA = -2) } else if (name == ov::inference_num_threads) { const auto num_threads = config.streamExecutorConfig.get_threads(); - return decltype(ov::inference_num_threads)::value_type(num_threads); + return static_cast(num_threads); } else if (name == ov::enable_profiling.name()) { const bool perfCount = config.collectPerfCounters; - return decltype(ov::enable_profiling)::value_type(perfCount); + return static_cast(perfCount); } else if (name == ov::hint::inference_precision) { return decltype(ov::hint::inference_precision)::value_type(config.inferencePrecision); } else if (name == ov::hint::performance_mode) { - return decltype(ov::hint::performance_mode)::value_type(config.hintPerfMode); + return static_cast(config.hintPerfMode); } else if (name == ov::log::level) { - return decltype(ov::log::level)::value_type(config.logLevel); + return static_cast(config.logLevel); } else if (name == ov::hint::enable_cpu_pinning.name()) { const bool use_pin = config.enableCpuPinning; - return decltype(ov::hint::enable_cpu_pinning)::value_type(use_pin); + return static_cast(use_pin); } else if (name == ov::hint::enable_cpu_reservation.name()) { const bool use_reserve = config.enableCpuReservation; - return decltype(ov::hint::enable_cpu_reservation)::value_type(use_reserve); + return static_cast(use_reserve); } else if (name == ov::hint::scheduling_core_type) { const auto stream_mode = config.schedulingCoreType; return stream_mode; @@ -307,21 +312,22 @@ ov::Any CompiledModel::get_property(const std::string& name) const { return distribution_policy; } else if (name == ov::hint::enable_hyper_threading.name()) { const bool use_ht = config.enableHyperThreading; - return decltype(ov::hint::enable_hyper_threading)::value_type(use_ht); + return static_cast(use_ht); } else if (name == ov::hint::execution_mode) { return config.executionMode; } else if (name == ov::hint::num_requests) { - return decltype(ov::hint::num_requests)::value_type(config.hintNumRequests); + return static_cast(config.hintNumRequests); } else if (name == ov::execution_devices) { return decltype(ov::execution_devices)::value_type{m_plugin->get_device_name()}; } else if (name == ov::intel_cpu::denormals_optimization) { - return decltype(ov::intel_cpu::denormals_optimization)::value_type(config.denormalsOptMode == - Config::DenormalsOptMode::DO_On); + return static_cast( + config.denormalsOptMode == Config::DenormalsOptMode::DO_On); } else if (name == ov::intel_cpu::sparse_weights_decompression_rate) { - return decltype(ov::intel_cpu::sparse_weights_decompression_rate)::value_type( + return static_cast( config.fcSparseWeiDecompressionRate); } else if (name == ov::hint::dynamic_quantization_group_size) { - return decltype(ov::hint::dynamic_quantization_group_size)::value_type(config.fcDynamicQuantizationGroupSize); + return static_cast( + config.fcDynamicQuantizationGroupSize); } else if (name == ov::hint::kv_cache_precision) { return decltype(ov::hint::kv_cache_precision)::value_type(config.kvCachePrecision); } else if (name == ov::key_cache_precision) { @@ -329,9 +335,9 @@ ov::Any CompiledModel::get_property(const std::string& name) const { } else if (name == ov::value_cache_precision) { return decltype(ov::value_cache_precision)::value_type(config.valueCachePrecision); } else if (name == ov::key_cache_group_size) { - return decltype(ov::key_cache_group_size)::value_type(config.keyCacheGroupSize); + return static_cast(config.keyCacheGroupSize); } else if (name == ov::value_cache_group_size) { - return decltype(ov::value_cache_group_size)::value_type(config.valueCacheGroupSize); + return static_cast(config.valueCacheGroupSize); } OPENVINO_THROW("Unsupported property: ", name); } diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index db53bb0c531b1a..8ec7b0b3b8d18f 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -94,8 +94,9 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { try { ov::Any value = val.as(); int val_i = value.as(); - if (val_i < 0) + if (val_i < 0) { OPENVINO_THROW("invalid value."); + } hintNumRequests = static_cast(val_i); } catch (const ov::Exception&) { OPENVINO_THROW("Wrong value ", @@ -278,14 +279,15 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { } else if (key == ov::intel_cpu::snippets_mode.name()) { try { auto const mode = val.as(); - if (mode == ov::intel_cpu::SnippetsMode::ENABLE) + if (mode == ov::intel_cpu::SnippetsMode::ENABLE) { snippetsMode = SnippetsMode::Enable; - else if (mode == ov::intel_cpu::SnippetsMode::IGNORE_CALLBACK) + } else if (mode == ov::intel_cpu::SnippetsMode::IGNORE_CALLBACK) { snippetsMode = SnippetsMode::IgnoreCallback; - else if (mode == ov::intel_cpu::SnippetsMode::DISABLE) + } else if (mode == ov::intel_cpu::SnippetsMode::DISABLE) { snippetsMode = SnippetsMode::Disable; - else + } else { OPENVINO_THROW("invalid value"); + } } catch (ov::Exception&) { OPENVINO_THROW("Wrong value ", val.as(), @@ -396,8 +398,9 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { inferencePrecision = ov::element::f16; } #endif - if (mayiuse(avx512_core_bf16)) + if (mayiuse(avx512_core_bf16)) { inferencePrecision = ov::element::bf16; + } } else { inferencePrecision = ov::element::undefined; } @@ -431,8 +434,9 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { } } - if (!prop.empty()) + if (!prop.empty()) { _config.clear(); + } if (exclusiveAsyncRequests) { // Exclusive request feature disables the streams streams = 1; @@ -453,17 +457,20 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { } void Config::updateProperties() { - if (!_config.empty()) + if (!_config.empty()) { return; + } - if (collectPerfCounters == true) + if (collectPerfCounters == true) { _config.insert({ov::enable_profiling.name(), "YES"}); - else + } else { _config.insert({ov::enable_profiling.name(), "NO"}); - if (exclusiveAsyncRequests == true) + } + if (exclusiveAsyncRequests == true) { _config.insert({ov::internal::exclusive_async_requests.name(), "YES"}); - else + } else { _config.insert({ov::internal::exclusive_async_requests.name(), "NO"}); + } _config.insert({ov::device::id.name(), device_id}); diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp index 5e749121ecda51..82c7bc4507a4c8 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.cpp +++ b/src/plugins/intel_cpu/src/cpu_memory.cpp @@ -135,8 +135,9 @@ void Memory::load(const IMemory& src, bool ftz) const { void Memory::nullify() { void* dataPtr = getData(); - if (dataPtr != nullptr) + if (dataPtr != nullptr) { memset(dataPtr, 0, getDesc().getCurrentMemSize()); + } } void Memory::redefineDesc(MemoryDescPtr desc) { @@ -194,8 +195,9 @@ dnnl::memory Memory::DnnlMemPrimHandle::getPrim() const { void* Memory::getData() const { void* data = getDataNoThrow(); - if (data == nullptr && m_pMemDesc->getShape().isStatic() && m_pMemDesc->getShape().getElementsCount() != 0) + if (data == nullptr && m_pMemDesc->getShape().isStatic() && m_pMemDesc->getShape().getElementsCount() != 0) { OPENVINO_THROW("Memory has not been allocated"); + } return data; } @@ -492,8 +494,9 @@ dnnl::memory StaticMemory::getPrimitive() const { void StaticMemory::nullify() { void* dataPtr = getData(); - if (dataPtr != nullptr) + if (dataPtr != nullptr) { memset(dataPtr, 0, getSize()); + } } StaticMemory::StaticMemoryBlock::StaticMemoryBlock(size_t size) : m_size(size) { @@ -539,13 +542,14 @@ void StaticMemory::StaticMemoryBlock::unregisterMemory(Memory* memPtr) { # if !defined(__NR_mbind) && defined(__x86_64__) # define __NR_mbind 237 # endif -static long mbind(void* start, - unsigned long len, - int mode, - const unsigned long* nmask, - unsigned long maxnode, - unsigned flags) { - return syscall(__NR_mbind, (long)start, len, mode, (long)nmask, maxnode, flags); +static int64_t mbind(void* start, uint64_t len, int mode, const uint64_t* nmask, uint64_t maxnode, unsigned flags) { + return syscall(__NR_mbind, + reinterpret_cast(start), + len, + mode, + reinterpret_cast(nmask), + maxnode, + flags); } #endif @@ -555,8 +559,8 @@ bool mbind_move(void* data, size_t size, int targetNode) { auto pagesize = getpagesize(); auto page_count = (size + pagesize - 1) / pagesize; char* pages = reinterpret_cast( // NOLINT(performance-no-int-to-ptr) - (((uintptr_t)data) & ~((uintptr_t)(pagesize - 1)))); - unsigned long mask = 0; + ((reinterpret_cast(data)) & ~(static_cast(pagesize - 1)))); + uint64_t mask = 0; unsigned flags = 0; if (realNode < 0) { // restore default policy diff --git a/src/plugins/intel_cpu/src/cpu_tensor.cpp b/src/plugins/intel_cpu/src/cpu_tensor.cpp index 14378592c576e0..548bc2755c5ccc 100644 --- a/src/plugins/intel_cpu/src/cpu_tensor.cpp +++ b/src/plugins/intel_cpu/src/cpu_tensor.cpp @@ -33,8 +33,9 @@ void Tensor::set_shape(ov::Shape new_shape) { vec2str(shape.getStaticDims()), " -> ", new_shape.to_string()); - if (shape.getStaticDims() == new_shape) + if (shape.getStaticDims() == new_shape) { return; + } } auto desc = m_memptr->getDescPtr(); diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp index 18258b6b02e4df..64cfbb1ce3d36a 100644 --- a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp +++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp @@ -186,8 +186,9 @@ size_t DnnlExtensionUtils::getMemSizeForDnnlDesc(const dnnl::memory::desc& desc) "Unexpected non zero offset for a dnnl blocked memory desc"); size_t size = desc.get_size(); - if (size == DNNL_RUNTIME_SIZE_VAL) + if (size == DNNL_RUNTIME_SIZE_VAL) { return MemoryDesc::UNDEFINED_SIZE; + } return size; } @@ -207,8 +208,9 @@ DnnlMemoryDescPtr DnnlExtensionUtils::query_md(const const_dnnl_primitive_desc_t auto query = dnnl::convert_to_c(what); const auto* cdesc = dnnl_primitive_desc_query_md(pd, query, idx); - if (!cdesc) + if (!cdesc) { OPENVINO_THROW("query_md failed for query=", query, " idx=", idx, "."); + } return DnnlExtensionUtils::makeDescriptor(cdesc); } @@ -216,8 +218,9 @@ DnnlMemoryDescPtr DnnlExtensionUtils::query_md(const const_dnnl_primitive_desc_t std::string DnnlExtensionUtils::query_impl_info_str(const const_dnnl_primitive_desc_t& pd) { const char* res; dnnl_status_t status = dnnl_primitive_desc_query(pd, dnnl_query_impl_info_str, 0, &res); - if (status != dnnl_success) + if (status != dnnl_success) { OPENVINO_THROW("query_impl_info_str failed."); + } return std::string(res); } diff --git a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp index 1233ce95f40c23..a328dfa2dfb41f 100644 --- a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp +++ b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp @@ -145,8 +145,9 @@ bool DnnlPostOpsComposer::appendAttrPostOps(const ScaleShiftPostOp& postOp, bool } break; case ScaleShiftPostOp::Type::prelu: - if (!allowBinary) + if (!allowBinary) { return false; + } appendBinary(dnnl::algorithm::binary_prelu, scales); break; default: @@ -159,12 +160,14 @@ bool DnnlPostOpsComposer::appendAttrPostOps(const ScaleShiftPostOp& postOp, bool static float roundHalfToEven(float f) { const float RHAFZ = std::round(f); // r is round-half-away-from-zero const float d = RHAFZ - f; // f + d -> RHAFZ - if ((d != 0.5f) && (d != -0.5f)) + if ((d != 0.5f) && (d != -0.5f)) { return RHAFZ; + } // already even +/-1.5 -> +/-2 - if (std::fmod(RHAFZ, 2.0f) == 0.0f) + if (std::fmod(RHAFZ, 2.0f) == 0.0f) { return RHAFZ; + } // +/-2.5 -> +/-3, but we need it to to +/-2 // RHAFZ (f+d) goes the wrong way, should be (f-d) @@ -181,8 +184,9 @@ struct OptimizedFormula { void shrinkLength() { auto _do_shrink = [](std::vector& v) { - if (v.size() <= 1) + if (v.size() <= 1) { return; + } auto ref = v[0]; if (std::all_of(v.cbegin(), v.cend(), [&](float val) { return val == ref; @@ -244,8 +248,9 @@ static OptimizedFormula updateOptimizedFormula(const FakeQuantizePostOp& postOp, // per-channel FQ. if (isPerTensor(inputShift, inputShift[0], 0.00005f)) { f.ish.resize(OC); - for (auto& v : f.ish) + for (auto& v : f.ish) { v = inputShift[0]; + } } else { f.ish = inputShift; } @@ -255,14 +260,18 @@ static OptimizedFormula updateOptimizedFormula(const FakeQuantizePostOp& postOp, f.osc = outputScale; f.osh = outputShift; - if (f.clo.size() == 1) + if (f.clo.size() == 1) { f.clo.resize(OC, f.clo[0]); - if (f.chi.size() == 1) + } + if (f.chi.size() == 1) { f.chi.resize(OC, f.chi[0]); - if (f.isc.size() == 1) + } + if (f.isc.size() == 1) { f.isc.resize(OC, f.isc[0]); - if (f.ish.size() == 1) + } + if (f.ish.size() == 1) { f.ish.resize(OC, f.ish[0]); + } for (size_t i = 0; i < OC; i++) { auto& clo = f.clo[i]; @@ -274,16 +283,18 @@ static OptimizedFormula updateOptimizedFormula(const FakeQuantizePostOp& postOp, clo = roundHalfToEven(clo * isc + ish); chi = roundHalfToEven(chi * isc + ish); - if (clo > chi) + if (clo > chi) { std::swap(clo, chi); + } if (!do_rounding) { // when no rounding is needed, outputScale/outputShift can be // merged with inputScale/inputShift with updated cropLow/cropHigh clo = clo * osc + osh; chi = chi * osc + osh; - if (clo > chi) + if (clo > chi) { std::swap(clo, chi); + } // crop(x*isc + ish, a, b)*osc + osh // crop(x*isc*osc + ish*osc + osh, a', b') @@ -367,24 +378,30 @@ bool DnnlPostOpsComposer::appendAttrPostOps(const FakeQuantizePostOp& postOp, // return false before committing any change to DnnlPostOpsComposer if (!allowBinary) { - if (f.ish.size() > 1) + if (f.ish.size() > 1) { return false; + } if (!skipRoundClipOutputLinear) { - if (f.clo.size() > 1 || f.chi.size() > 1) + if (f.clo.size() > 1 || f.chi.size() > 1) { return false; - if (f.osc.size() > 1 || f.osh.size() > 1) + } + if (f.osc.size() > 1 || f.osh.size() > 1) { return false; + } } } - if (!appendLinear(f.isc, f.ish, isLastPostOp && skipRoundClipOutputLinear, allowBinary)) + if (!appendLinear(f.isc, f.ish, isLastPostOp && skipRoundClipOutputLinear, allowBinary)) { return false; + } - if (skipRoundClipOutputLinear) + if (skipRoundClipOutputLinear) { return true; + } - if (doRounding) + if (doRounding) { appendRoundHTE(); + } appendClip(f.clo, f.chi); appendLinear(f.osc, f.osh, isLastPostOp, allowBinary); @@ -392,8 +409,9 @@ bool DnnlPostOpsComposer::appendAttrPostOps(const FakeQuantizePostOp& postOp, } void DnnlPostOpsComposer::updateWeiScales() { - if (wei_scale_mask == 0 && wei_scale_values[0] == 1.0f) + if (wei_scale_mask == 0 && wei_scale_values[0] == 1.0f) { return; + } DEBUG_LOG("Set weight scales mask ", "DNNL_ARG: ", DNNL_ARG_WEIGHTS, " mask: ", wei_scale_mask); attr.set_scales_mask(DNNL_ARG_WEIGHTS, wei_scale_mask); @@ -406,8 +424,9 @@ void DnnlPostOpsComposer::updateWeiScales() { } void DnnlPostOpsComposer::updateDestScales() { - if (dst_scale_val == 1.0f) + if (dst_scale_val == 1.0f) { return; + } DEBUG_LOG("Set dest scale mask ", "DNNL_ARG: ", DNNL_ARG_DST, " mask: ", 0); attr.set_scales_mask(DNNL_ARG_DST, 0); @@ -474,8 +493,9 @@ bool DnnlPostOpsComposer::appendScale(const std::vector& scale, bool isLa // // we cannot implement all of them, so we just add the one // that we observed in real models. - if ((ops.len() == 0)) + if ((ops.len() == 0)) { fuseIntoWeiScale = true; + } // relu(x)*s = relu(x*s) // prelu(x)*s = prelu(x*s) @@ -498,22 +518,26 @@ bool DnnlPostOpsComposer::appendScale(const std::vector& scale, bool isLa } if (fuseIntoWeiScale) { if (scale.size() > 1) { - if (wei_scale_mask == 0) + if (wei_scale_mask == 0) { wei_scale_values.resize(scale.size(), wei_scale_values[0]); - else + } else { OPENVINO_ASSERT(wei_scale_values.size() == OC); + } - for (Dim j = 0; j < OC; j++) + for (Dim j = 0; j < OC; j++) { wei_scale_values[j] *= scale[j]; + } } else { - for (size_t j = 0; j < wei_scale_values.size(); j++) + for (size_t j = 0; j < wei_scale_values.size(); j++) { wei_scale_values[j] *= scale[0]; + } } - if (wei_scale_values.size() == 1) + if (wei_scale_values.size() == 1) { wei_scale_mask = 0; - else + } else { wei_scale_mask = weightScaleMaskPerChannel; + } updateWeiScales(); @@ -525,8 +549,9 @@ bool DnnlPostOpsComposer::appendScale(const std::vector& scale, bool isLa appendEltwise(dnnl::algorithm::eltwise_linear, scale[0], 0); } else { // this check returns before committing any changes - if (!allowBinary) + if (!allowBinary) { return false; + } appendBinary(dnnl::algorithm::binary_mul, scale); } return true; @@ -538,8 +563,9 @@ bool DnnlPostOpsComposer::appendShift(const std::vector& shift, bool allo appendEltwise(dnnl::algorithm::eltwise_linear, 1.0f, shift[0]); } } else { - if (!allowBinary) + if (!allowBinary) { return false; + } appendBinary(dnnl::algorithm::binary_add, shift); } return true; @@ -550,22 +576,26 @@ bool DnnlPostOpsComposer::appendLinear(const std::vector& scale, bool isLastPostOp, bool allowBinary) { if (scale.size() == 1 && shift.size() == 1) { - if (shift[0] == 0.0f) + if (shift[0] == 0.0f) { return appendScale(scale, isLastPostOp, allowBinary); - else + } else { appendEltwise(dnnl::algorithm::eltwise_linear, scale[0], shift[0]); + } } else { // return before committing any changes - if (!allowBinary && shift.size() > 1) + if (!allowBinary && shift.size() > 1) { return false; + } if (!scale.empty()) { - if (!appendScale(scale, isLastPostOp && shift.empty(), allowBinary)) + if (!appendScale(scale, isLastPostOp && shift.empty(), allowBinary)) { return false; + } } if (!shift.empty()) { - if (!appendShift(shift, allowBinary)) + if (!appendShift(shift, allowBinary)) { return false; + } } } return true; @@ -577,13 +607,15 @@ void DnnlPostOpsComposer::appendClip(const std::vector& low, const std::v } else if (low.size() == 1) { OPENVINO_ASSERT(high.size() == OC); appendEltwise(dnnl::algorithm::eltwise_clip, low[0], std::numeric_limits::max()); - if (high.size() > 0) + if (high.size() > 0) { appendBinary(dnnl::algorithm::binary_min, high); + } } else if (high.size() == 1) { OPENVINO_ASSERT(low.size() == OC); appendEltwise(dnnl::algorithm::eltwise_clip, -std::numeric_limits::max(), high[0]); - if (low.size() > 0) + if (low.size() > 0) { appendBinary(dnnl::algorithm::binary_max, low); + } } else { if (low.size() > 0) { OPENVINO_ASSERT(low.size() == OC); @@ -605,8 +637,9 @@ static MemoryPtr prepackDecompressionParams(const MemoryCPtr& paramsPtr, shape.push_back(1); } - if (shape.size() != 2 && shape.size() != 3) + if (shape.size() != 2 && shape.size() != 3) { OPENVINO_THROW("DnnlPostOpsComposer cannot prepack decompression params with invalid shape"); + } // weights without batch: (OC, G) // weights with batch: (B, OC, G) @@ -632,8 +665,9 @@ static MemoryPtr prepackDecompressionParams(const MemoryCPtr& paramsPtr, } static dnnl::memory::dims getGroupDims(const VectorDims& weiDims, const VectorDims& scaleDims) { - if (scaleDims[0] == 1 && scaleDims[1] == 1) + if (scaleDims[0] == 1 && scaleDims[1] == 1) { return {}; + } int N = weiDims[weiDims.size() - 2]; int K = weiDims[weiDims.size() - 1]; @@ -649,10 +683,12 @@ static int getMask(const VectorDims& weiDims, const dnnl::memory::dims& groupDim int N = weiDims[weiDims.size() - 2]; int K = weiDims[weiDims.size() - 1]; int mask = 0; - if (!groupDims.empty() && groupDims[1] != N) + if (!groupDims.empty() && groupDims[1] != N) { mask += maskN; - if (!groupDims.empty() && groupDims[0] != K) + } + if (!groupDims.empty() && groupDims[0] != K) { mask += maskK; + } return mask; } @@ -661,8 +697,9 @@ void DnnlPostOpsComposer::appendDecompressionScales(const MemoryCPtr& scales_ptr bool needTranspose, ov::element::Type dstPrecision, const VectorDims& weiDims) { - if (scales_ptr == nullptr) + if (scales_ptr == nullptr) { return; + } auto scaleMem = prepackDecompressionParams(scales_ptr, needTranspose, dstPrecision, engine); auto groupDims = getGroupDims(weiDims, scaleMem->getStaticDims()); @@ -678,8 +715,9 @@ void DnnlPostOpsComposer::appendDecompressionZeroPoints(const MemoryCPtr& zero_p bool needTranspose, ov::element::Type dstPrecision, const VectorDims& weiDims) { - if (zero_points_ptr == nullptr) + if (zero_points_ptr == nullptr) { return; + } auto zeroPointsMem = prepackDecompressionParams(zero_points_ptr, needTranspose, dstPrecision, engine); auto groupDims = getGroupDims(weiDims, zeroPointsMem->getStaticDims()); @@ -693,8 +731,9 @@ void DnnlPostOpsComposer::appendDecompressionZeroPoints(const MemoryCPtr& zero_p void DnnlPostOpsComposer::appendDecompressionScalesLegacy(const MemoryCPtr& scales_ptr, bool needTranspose, ov::element::Type dstPrecision) { - if (scales_ptr == nullptr) + if (scales_ptr == nullptr) { return; + } auto scalesMem = prepackDecompressionParams(scales_ptr, needTranspose, dstPrecision, engine); attr.set_scales_dims(DNNL_ARG_WEIGHTS, @@ -708,8 +747,9 @@ void DnnlPostOpsComposer::appendDecompressionScalesLegacy(const MemoryCPtr& scal void DnnlPostOpsComposer::appendDecompressionZeroPointsLegacy(const MemoryCPtr& zero_points_ptr, bool needTranspose, ov::element::Type dstPrecision) { - if (zero_points_ptr == nullptr) + if (zero_points_ptr == nullptr) { return; + } auto zeroPointsMem = prepackDecompressionParams(zero_points_ptr, needTranspose, dstPrecision, engine); attr.set_zero_points_dims(DNNL_ARG_WEIGHTS, diff --git a/src/plugins/intel_cpu/src/dnnl_postops_composer_legacy.cpp b/src/plugins/intel_cpu/src/dnnl_postops_composer_legacy.cpp index dcafad1e524bae..317b0796faed72 100644 --- a/src/plugins/intel_cpu/src/dnnl_postops_composer_legacy.cpp +++ b/src/plugins/intel_cpu/src/dnnl_postops_composer_legacy.cpp @@ -54,8 +54,9 @@ DnnlPostOpsComposerLegacy::DnnlPostOpsComposerLegacy(const dnnl::engine& engine, } void DnnlPostOpsComposerLegacy::updateWeiScales() { - if (wei_scale_mask == 0 && wei_scale_values[0] == 1.0f) + if (wei_scale_mask == 0 && wei_scale_values[0] == 1.0f) { return; + } DEBUG_LOG("Set weight scales mask ", "DNNL_ARG: ", DNNL_ARG_WEIGHTS, " mask: ", wei_scale_mask); attr.set_scales_mask(DNNL_ARG_WEIGHTS, wei_scale_mask); @@ -67,8 +68,9 @@ void DnnlPostOpsComposerLegacy::updateWeiScales() { } void DnnlPostOpsComposerLegacy::updateDestScales() { - if (dst_scale_val == 1.0f) + if (dst_scale_val == 1.0f) { return; + } DEBUG_LOG("Set dest scale mask ", "DNNL_ARG: ", DNNL_ARG_DST, " mask: ", 0); attr.set_scales_mask(DNNL_ARG_DST, 0); @@ -133,8 +135,9 @@ bool DnnlPostOpsComposerLegacy::appendScale(const std::vector& scale, boo // // we cannot implement all of them, so we just add the one // that we observed in real models. - if ((ops.len() == 0)) + if ((ops.len() == 0)) { fuseIntoWeiScale = true; + } // relu(x)*s = relu(x*s) // prelu(x)*s = prelu(x*s) @@ -157,22 +160,26 @@ bool DnnlPostOpsComposerLegacy::appendScale(const std::vector& scale, boo } if (fuseIntoWeiScale) { if (scale.size() > 1) { - if (wei_scale_mask == 0) + if (wei_scale_mask == 0) { wei_scale_values.resize(scale.size(), wei_scale_values[0]); - else + } else { OPENVINO_ASSERT(wei_scale_values.size() == OC); + } - for (Dim j = 0; j < OC; j++) + for (Dim j = 0; j < OC; j++) { wei_scale_values[j] *= scale[j]; + } } else { - for (size_t j = 0; j < wei_scale_values.size(); j++) + for (size_t j = 0; j < wei_scale_values.size(); j++) { wei_scale_values[j] *= scale[0]; + } } - if (wei_scale_values.size() == 1) + if (wei_scale_values.size() == 1) { wei_scale_mask = 0; - else + } else { wei_scale_mask = weightScaleMaskPerChannel; + } updateWeiScales(); return true; } @@ -182,8 +189,9 @@ bool DnnlPostOpsComposerLegacy::appendScale(const std::vector& scale, boo appendEltwise(dnnl::algorithm::eltwise_linear, scale[0], 0); } else { // this check returns before committing any changes - if (!allowBinary) + if (!allowBinary) { return false; + } appendBinary(dnnl::algorithm::binary_mul, scale); } return true; @@ -195,8 +203,9 @@ bool DnnlPostOpsComposerLegacy::appendShift(const std::vector& shift, boo appendEltwise(dnnl::algorithm::eltwise_linear, 1.0f, shift[0]); } } else { - if (!allowBinary) + if (!allowBinary) { return false; + } appendBinary(dnnl::algorithm::binary_add, shift); } return true; @@ -207,22 +216,26 @@ bool DnnlPostOpsComposerLegacy::appendLinear(const std::vector& scale, bool isLastPostOp, bool allowBinary) { if (scale.size() == 1 && shift.size() == 1) { - if (shift[0] == 0.0f) + if (shift[0] == 0.0f) { return appendScale(scale, isLastPostOp, allowBinary); - else + } else { appendEltwise(dnnl::algorithm::eltwise_linear, scale[0], shift[0]); + } } else { // return before committing any changes - if (!allowBinary && shift.size() > 1) + if (!allowBinary && shift.size() > 1) { return false; + } if (!scale.empty()) { - if (!appendScale(scale, isLastPostOp && shift.empty(), allowBinary)) + if (!appendScale(scale, isLastPostOp && shift.empty(), allowBinary)) { return false; + } } if (!shift.empty()) { - if (!appendShift(shift, allowBinary)) + if (!appendShift(shift, allowBinary)) { return false; + } } } return true; @@ -234,13 +247,15 @@ void DnnlPostOpsComposerLegacy::appendClip(const std::vector& low, const } else if (low.size() == 1) { OPENVINO_ASSERT(high.size() == OC); appendEltwise(dnnl::algorithm::eltwise_clip, low[0], std::numeric_limits::max()); - if (high.size() > 0) + if (high.size() > 0) { appendBinary(dnnl::algorithm::binary_min, high); + } } else if (high.size() == 1) { OPENVINO_ASSERT(low.size() == OC); appendEltwise(dnnl::algorithm::eltwise_clip, -std::numeric_limits::max(), high[0]); - if (low.size() > 0) + if (low.size() > 0) { appendBinary(dnnl::algorithm::binary_max, low); + } } else { if (low.size() > 0) { OPENVINO_ASSERT(low.size() == OC); diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp index 2ee0a42f4cae3b..f7578f65ce3bbb 100644 --- a/src/plugins/intel_cpu/src/edge.cpp +++ b/src/plugins/intel_cpu/src/edge.cpp @@ -21,15 +21,17 @@ Edge::Edge(const NodePtr& parent, const NodePtr& child, int pr_port, int ch_port const NodePtr Edge::getParent() const { auto parentPtr = parent.lock(); - if (!parentPtr) + if (!parentPtr) { OPENVINO_THROW("Edge contains empty parent node"); + } return parentPtr; } const NodePtr Edge::getChild() const { auto childPtr = child.lock(); - if (!childPtr) + if (!childPtr) { OPENVINO_THROW("Edge contains empty child node"); + } return childPtr; } @@ -43,16 +45,20 @@ bool Edge::isDropped() const { auto parent_ptr = parent.lock(); if (parent_ptr) { - for (auto& edge : parent_ptr->childEdges) - if (edge.lock().get() == this) + for (auto& edge : parent_ptr->childEdges) { + if (edge.lock().get() == this) { not_in_parent = false; + } + } } auto child_ptr = child.lock(); if (child_ptr) { - for (auto& edge : child_ptr->parentEdges) - if (edge.lock().get() == this) + for (auto& edge : child_ptr->parentEdges) { + if (edge.lock().get() == this) { not_in_child = false; + } + } } return not_in_parent && not_in_child; } @@ -82,8 +88,9 @@ void Edge::collectConsumers(std::vector& result) const { } } } else { - if (!add_result_node(result, childNode)) + if (!add_result_node(result, childNode)) { return; + } // collect consumers in case of an upstream in-place memory reference if (auto peerChildSPD = childNode->getSelectedPrimitiveDescriptor()) { @@ -105,8 +112,9 @@ bool Edge::enforceReorder() { auto parentSPD = parentNode->getSelectedPrimitiveDescriptor(); auto childNode = getChild(); auto childSPD = childNode->getSelectedPrimitiveDescriptor(); - if (!parentSPD || !childSPD) + if (!parentSPD || !childSPD) { OPENVINO_THROW("Cannot make a decision about reorder. Primitive descriptors weren't selected."); + } bool in_place = inPlace(); @@ -122,8 +130,9 @@ bool Edge::enforceReorder() { if (portChildEdges.size() > 1) { if (in_place) { for (auto& p_edge_peer : portChildEdges) { - if (p_edge_peer.get() == this) + if (p_edge_peer.get() == this) { continue; + } if (p_edge_peer->inPlace(LOOK_DOWN)) { return true; } @@ -138,15 +147,17 @@ static inline bool isPhycicalMemCompatible(const MemoryDesc& lhsMemDesc, const M if (!lhsMemDesc.isDefined() || !rhsMemDesc.isDefined() || !(lhsMemDesc.getType() & MemoryDescType::Blocked) || !(rhsMemDesc.getType() & MemoryDescType::Blocked) || (lhsMemDesc.getType() == DnnlBlocked && !lhsMemDesc.as()->hasEmptyExtraData()) || - (rhsMemDesc.getType() == DnnlBlocked && !rhsMemDesc.as()->hasEmptyExtraData())) + (rhsMemDesc.getType() == DnnlBlocked && !rhsMemDesc.as()->hasEmptyExtraData())) { return false; + } const auto lhsBlockMemDesc = lhsMemDesc.as(); const auto rhsBlockMemDesc = rhsMemDesc.as(); if (lhsBlockMemDesc->getShape() != rhsBlockMemDesc->getShape() || - lhsBlockMemDesc->getPrecision() != rhsBlockMemDesc->getPrecision()) + lhsBlockMemDesc->getPrecision() != rhsBlockMemDesc->getPrecision()) { return false; + } // dims padding check bool isZeroDimsPaddings = std::all_of(lhsBlockMemDesc->getOffsetPaddingToData().begin(), @@ -160,8 +171,9 @@ static inline bool isPhycicalMemCompatible(const MemoryDesc& lhsMemDesc, const M return x == 0; }); bool isSameElementsCount = lhsBlockMemDesc->getPaddedElementsCount() == rhsBlockMemDesc->getPaddedElementsCount(); - if (!isZeroDimsPaddings || !isSameElementsCount) + if (!isZeroDimsPaddings || !isSameElementsCount) { return false; + } // tensor padding check if (lhsBlockMemDesc->getOffsetPadding() != rhsBlockMemDesc->getOffsetPadding()) { @@ -192,12 +204,14 @@ static inline bool isPhycicalMemCompatible(const MemoryDesc& lhsMemDesc, const M bool isDenseTensor = dimsEqualStrong(lhsStridesDefault, lhsBlockMemDesc->getStrides(), lhsSkipAxis) && dimsEqualStrong(rhsStridesDefault, rhsBlockMemDesc->getStrides(), rhsSkipAxis); - if (!isDenseTensor) + if (!isDenseTensor) { return false; + } auto getCleanDim = [&](const VectorDims& dims, const VectorDims& flag) { - if (dims.size() != flag.size()) + if (dims.size() != flag.size()) { return dims; + } std::vector ret; for (size_t i = 0; i < dims.size(); i++) { if (flag[i] != 1) { @@ -210,14 +224,16 @@ static inline bool isPhycicalMemCompatible(const MemoryDesc& lhsMemDesc, const M // block dim check auto lhsBlockDimsClean = getCleanDim(lhsBlockDims, lhsBlockDims); auto rhsBlockDimsClean = getCleanDim(rhsBlockDims, rhsBlockDims); - if (!dimsEqualStrong(lhsBlockDimsClean, rhsBlockDimsClean)) + if (!dimsEqualStrong(lhsBlockDimsClean, rhsBlockDimsClean)) { return false; + } // order check auto lhsOrderClean = getCleanDim(lhsBlockMemDesc->getOrder(), lhsBlockDims); auto rhsOrderClean = getCleanDim(rhsBlockMemDesc->getOrder(), rhsBlockDims); - if (!dimsEqualStrong(lhsOrderClean, rhsOrderClean)) + if (!dimsEqualStrong(lhsOrderClean, rhsOrderClean)) { return false; + } return true; } @@ -227,8 +243,9 @@ Edge::ReorderStatus Edge::needReorder() { auto inputPortDesc = getInputPortDesc(); auto outPortDesc = getOutputPortDesc(); - if (inputPortDesc->getMemDesc()->getPrecision() == element::undefined) + if (inputPortDesc->getMemDesc()->getPrecision() == element::undefined) { return ReorderStatus::No; + } // Check whether the child node may accept the parent produced tensor if (!outPortDesc->isCompatible(*inputPortDesc)) { @@ -271,13 +288,15 @@ int Edge::getOutputNum() const { } void Edge::allocateCommon(const std::function& allocate) { - if (memoryPtr) + if (memoryPtr) { OPENVINO_THROW("Unexpected behaviour: status == NeedAllocation but memory is already allocated."); + } auto& inputDesc = getInputDesc(); auto& outputDesc = getOutputDesc(); - if (!inputDesc.isCompatible(outputDesc)) + if (!inputDesc.isCompatible(outputDesc)) { OPENVINO_THROW("Cannot allocate memory for incompatible descriptors."); + } memoryPtr = allocate(inputDesc); DEBUG_LOG(*this, " memoryPtr=", memoryPtr); @@ -317,8 +336,9 @@ std::string Edge::hash() const { } void Edge::externalAllocate(const WeightsSharing::Ptr& weightsCache) { - if (status != Status::NeedAllocation) + if (status != Status::NeedAllocation) { return; + } if (weightsCache) { auto alloc = [this]() { @@ -354,28 +374,34 @@ void Edge::changeStatus(Edge::Status state) { if (Status::Validated == this->status) { OPENVINO_THROW("Unexpected attempt of memory change on edge: ", *this); } - if (this->status != Status::Uninitialized && state == Status::NeedAllocation) + if (this->status != Status::Uninitialized && state == Status::NeedAllocation) { return; - if (this->status == Status::NotAllocated) + } + if (this->status == Status::NotAllocated) { memoryFromEdge.reset(); + } this->status = state; } PortDescBaseCPtr Edge::getInputPortDesc() const { auto parentPtr = getParent(); - if (parentPtr->getSelectedPrimitiveDescriptor() == nullptr) + if (parentPtr->getSelectedPrimitiveDescriptor() == nullptr) { OPENVINO_THROW("Primitive descriptor for node ", parentPtr->getName(), " is not selected."); + } int inputIdx = getInputNum(); - if (inputIdx < 0) + if (inputIdx < 0) { OPENVINO_THROW("Edge cannot be found for node", parentPtr->getName(), "."); + } auto& outConfs = parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs; - if (outConfs.empty()) + if (outConfs.empty()) { OPENVINO_THROW("Node ", parentPtr->getName(), " has empty output config list."); + } - if (static_cast(inputIdx) >= outConfs.size()) + if (static_cast(inputIdx) >= outConfs.size()) { inputIdx = 0; + } auto inputPortDesc = outConfs[inputIdx].getPortDesc(); if (!inputPortDesc) { @@ -388,19 +414,22 @@ PortDescBaseCPtr Edge::getInputPortDesc() const { PortDescBaseCPtr Edge::getOutputPortDesc() const { auto childPtr = getChild(); - if (childPtr->getSelectedPrimitiveDescriptor() == nullptr) + if (childPtr->getSelectedPrimitiveDescriptor() == nullptr) { OPENVINO_THROW("Primitive descriptor for node ", childPtr->getName(), " is not selected."); + } int outputIdx = getOutputNum(); if (outputIdx < 0) { OPENVINO_THROW("Edge cannot be found for node", childPtr->getName(), "."); } auto& inConfs = childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs; - if (inConfs.empty()) + if (inConfs.empty()) { OPENVINO_THROW("Node ", childPtr->getName(), " has empty input config list."); + } - if (static_cast(outputIdx) >= inConfs.size()) + if (static_cast(outputIdx) >= inConfs.size()) { outputIdx = 0; + } auto outPortDesc = inConfs[outputIdx].getPortDesc(); if (!outPortDesc) { @@ -433,11 +462,13 @@ const MemoryDesc& Edge::getOutputDesc() const { } const MemoryDesc& Edge::getDesc() const { - if (getInputDesc().getPrecision() == element::undefined) + if (getInputDesc().getPrecision() == element::undefined) { return getInputDesc(); + } - if (!getInputDesc().isCompatible(getOutputDesc())) + if (!getInputDesc().isCompatible(getOutputDesc())) { OPENVINO_THROW("Cannot get descriptor for edge: ", getParent()->getName(), "->", getChild()->getName()); + } return getInputDesc(); } @@ -459,8 +490,9 @@ void Edge::sharedMemFrom(const EdgePtr& edge) { } void Edge::validate() { - if (status == Status::Validated) + if (status == Status::Validated) { return; + } getParent(); getChild(); @@ -484,8 +516,9 @@ EdgePtr Edge::getSharedEdge(std::nothrow_t) const { } void Edge::init() { - if (status != Status::NeedAllocation && status != Status::Uninitialized) + if (status != Status::NeedAllocation && status != Status::Uninitialized) { return; + } DEBUG_LOG(*this); EdgePtr edgePtr = getBaseEdge(); if (edgePtr.get() == this) { @@ -541,16 +574,18 @@ EdgePtr Edge::getBaseEdge(int look) { for (auto edge : edgesForSamePort) { if (edge.get() != this) { // Return once found the first inplace consumer - if (edge->inPlace()) + if (edge->inPlace()) { return edge; + } } } // Return the first output edge as the base if there is no inPlace consumers // thus benefits zero-copy of outputs. for (auto edge : edgesForSamePort) { - if (Type::Output == edge->getChild()->getType()) + if (Type::Output == edge->getChild()->getType()) { return edge; + } } return edgesForSamePort[0]; @@ -559,14 +594,16 @@ EdgePtr Edge::getBaseEdge(int look) { bool Edge::inPlace(LOOK look) const { int inputNum = getInputNum(); if (look & LOOK_UP) { - if (getParent()->inPlaceOutPort(inputNum) >= 0) + if (getParent()->inPlaceOutPort(inputNum) >= 0) { return true; + } } int outputNum = getOutputNum(); if (look & LOOK_DOWN) { - if (getChild()->inPlaceInputPort(outputNum) >= 0) + if (getChild()->inPlaceInputPort(outputNum) >= 0) { return true; + } } return false; } diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_emitter.cpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_emitter.cpp index 5033f645413557..62e809e02feb12 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_emitter.cpp @@ -67,8 +67,9 @@ void jit_emitter::emit_data() const { for (auto it = entry_map_.begin(); it != entry_map_.end(); it++) { const auto& te = (*it).second; // get map entry for a given key const auto len = te.bcast ? get_vec_length() : sizeof(table_entry_val_t); - for (size_t d = 0; d < len; d += sizeof(table_entry_val_t)) + for (size_t d = 0; d < len; d += sizeof(table_entry_val_t)) { h->dd(te.val); + } } } @@ -127,33 +128,41 @@ void jit_emitter::emitter_preamble(const std::vector& in_idxs, } for (size_t idx = 0; idx < get_max_vecs_count(); idx++) { - if (aux_vec_idxs.size() >= get_aux_vecs_count()) + if (aux_vec_idxs.size() >= get_aux_vecs_count()) { break; + } if (is_vec_input) { - if (std::find(in_idxs.begin(), in_idxs.end(), idx) != in_idxs.end()) + if (std::find(in_idxs.begin(), in_idxs.end(), idx) != in_idxs.end()) { continue; + } } if (is_vec_output) { - if (std::find(out_idxs.begin(), out_idxs.end(), idx) != out_idxs.end()) + if (std::find(out_idxs.begin(), out_idxs.end(), idx) != out_idxs.end()) { continue; + } } - if (std::find(in_idxs.begin(), in_idxs.end(), idx) != in_idxs.end()) + if (std::find(in_idxs.begin(), in_idxs.end(), idx) != in_idxs.end()) { continue; - if (std::find(out_idxs.begin(), out_idxs.end(), idx) != out_idxs.end()) + } + if (std::find(out_idxs.begin(), out_idxs.end(), idx) != out_idxs.end()) { continue; + } - if (std::find(aux_vec_idxs.begin(), aux_vec_idxs.end(), idx) != aux_vec_idxs.end()) + if (std::find(aux_vec_idxs.begin(), aux_vec_idxs.end(), idx) != aux_vec_idxs.end()) { continue; - if (std::find(preserved_vec_idxs.begin(), preserved_vec_idxs.end(), idx) != preserved_vec_idxs.end()) + } + if (std::find(preserved_vec_idxs.begin(), preserved_vec_idxs.end(), idx) != preserved_vec_idxs.end()) { continue; + } aux_vec_idxs.push_back(idx); preserved_vec_idxs.push_back(idx); } - if (aux_vec_idxs.size() < get_aux_vecs_count()) + if (aux_vec_idxs.size() < get_aux_vecs_count()) { OV_CPU_JIT_EMITTER_THROW("Failed to allocate required number of vector registers"); + } // gpr registers for (auto idx : pool_aux_gpr_idxs) { @@ -164,31 +173,38 @@ void jit_emitter::emitter_preamble(const std::vector& in_idxs, for (size_t gpr_idx = 0; gpr_idx <= end_gpr_idx; ++gpr_idx) { size_t _idx = end_gpr_idx - gpr_idx; // we allocate from the end - if (aux_gpr_idxs.size() >= get_aux_gprs_count()) + if (aux_gpr_idxs.size() >= get_aux_gprs_count()) { break; + } if ((_idx == Xbyak_aarch64::Operand::X18) || (_idx == Xbyak_aarch64::Operand::X23) || - (_idx == Xbyak_aarch64::Operand::X24) || (_idx == Xbyak_aarch64::Operand::X28)) + (_idx == Xbyak_aarch64::Operand::X24) || (_idx == Xbyak_aarch64::Operand::X28)) { continue; + } if (!is_vec_input) { - if (std::find(in_idxs.begin(), in_idxs.end(), _idx) != in_idxs.end()) + if (std::find(in_idxs.begin(), in_idxs.end(), _idx) != in_idxs.end()) { continue; + } } if (!is_vec_output) { - if (std::find(out_idxs.begin(), out_idxs.end(), _idx) != out_idxs.end()) + if (std::find(out_idxs.begin(), out_idxs.end(), _idx) != out_idxs.end()) { continue; + } } - if (std::find(aux_gpr_idxs.begin(), aux_gpr_idxs.end(), _idx) != aux_gpr_idxs.end()) + if (std::find(aux_gpr_idxs.begin(), aux_gpr_idxs.end(), _idx) != aux_gpr_idxs.end()) { continue; - if (std::find(preserved_gpr_idxs.begin(), preserved_gpr_idxs.end(), _idx) != preserved_gpr_idxs.end()) + } + if (std::find(preserved_gpr_idxs.begin(), preserved_gpr_idxs.end(), _idx) != preserved_gpr_idxs.end()) { continue; + } aux_gpr_idxs.push_back(_idx); preserved_gpr_idxs.push_back(_idx); } - if (aux_gpr_idxs.size() < get_aux_gprs_count()) + if (aux_gpr_idxs.size() < get_aux_gprs_count()) { OV_CPU_JIT_EMITTER_THROW("Failed to allocate required number of general-purpose registers"); + } if (!entry_map_.empty()) { // last aux_gpr_idx is for p_table, we can use aux_gpr_idxs from idx 0 for other purpose diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_load_store_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_load_store_emitters.cpp index 3ca77bdac53baf..a73ca8568fdc00 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_load_store_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_load_store_emitters.cpp @@ -163,8 +163,9 @@ void jit_load_emitter::emit_isa(const std::vector& in_idxs, const std::v } size_t jit_load_emitter::get_aux_gprs_count() const { - if (load_num_ == 3) + if (load_num_ == 3) { return 1; + } return 0; } @@ -318,8 +319,9 @@ void jit_store_emitter::emit_isa(const std::vector& in_idxs, const std:: } size_t jit_store_emitter::get_aux_gprs_count() const { - if (store_num_ == 3) + if (store_num_ == 3) { return 1; + } return 0; } diff --git a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_bf16_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_bf16_emitters.hpp index 78ad3b04aa06b1..a7f8b3fe432c9d 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_bf16_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_bf16_emitters.hpp @@ -148,8 +148,9 @@ class jit_uni_vcvtneps2bf16 : public jit_emitter { } size_t aux_vecs_count() const override { - if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16)) + if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16)) { return 0; + } return host_isa_ == dnnl::impl::cpu::x64::avx512_core ? 2 : 1; } }; diff --git a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_conversion_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_conversion_emitters.cpp index e508b428c5506b..dae396910576e2 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_conversion_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_conversion_emitters.cpp @@ -21,8 +21,9 @@ jit_convert_emitter::jit_convert_emitter(jit_generator* host, : jit_emitter(host, host_isa, exec_prc), input_type(node->get_input_element_type(0)), output_type(node->get_output_element_type(0)) { - if (output_type == ov::element::bf16) + if (output_type == ov::element::bf16) { uni_vcvtneps2bf16.reset(new jit_uni_vcvtneps2bf16(host, host_isa)); + } } void jit_convert_emitter::validate_types() const { @@ -32,10 +33,12 @@ void jit_convert_emitter::validate_types() const { }); }; - if (!is_supported_type(input_type)) + if (!is_supported_type(input_type)) { OV_CPU_JIT_EMITTER_THROW("Unsupported input type: ", input_type.get_type_name()); - if (!is_supported_type(output_type)) + } + if (!is_supported_type(output_type)) { OV_CPU_JIT_EMITTER_THROW("Unsupported output type: ", output_type.get_type_name()); + } } size_t jit_convert_emitter::get_inputs_num() const { @@ -44,8 +47,9 @@ size_t jit_convert_emitter::get_inputs_num() const { void jit_convert_emitter::emit_data() const { jit_emitter::emit_data(); - if (uni_vcvtneps2bf16) + if (uni_vcvtneps2bf16) { uni_vcvtneps2bf16->emit_data(); + } } template @@ -54,8 +58,9 @@ void jit_convert_emitter::float2bfloat(const std::vector& in_vec_idxs, using Vmm = typename conditional3::type; Vmm vmm_src = Vmm(in_vec_idxs[0]); Vmm vmm_dst = Vmm(out_vec_idxs[0]); - if (!uni_vcvtneps2bf16) + if (!uni_vcvtneps2bf16) { OV_CPU_JIT_EMITTER_THROW("Converter from float to bf16 isn't initialized!"); + } uni_vcvtneps2bf16->emit_code({static_cast(vmm_src.getIdx())}, {static_cast(vmm_dst.getIdx())}); } @@ -107,27 +112,32 @@ void jit_convert_truncation_emitter::emit_isa(const std::vector& in_vec_ switch (input_type) { case ov::element::f32: - if (one_of(output_type, ov::element::i32, ov::element::i8, ov::element::u8)) + if (one_of(output_type, ov::element::i32, ov::element::i8, ov::element::u8)) { h->uni_vcvttps2dq(vmm_dst, vmm_src); + } break; case ov::element::i32: - if (one_of(output_type, ov::element::f32, ov::element::bf16, ov::element::f16)) + if (one_of(output_type, ov::element::f32, ov::element::bf16, ov::element::f16)) { h->uni_vcvtdq2ps(vmm_dst, vmm_src); + } break; case ov::element::bf16: h->vpmovzxwd(vmm_dst, vmm_src); h->uni_vpslld(vmm_dst, vmm_dst, 16); - if (one_of(output_type, ov::element::i32, ov::element::i8, ov::element::u8)) + if (one_of(output_type, ov::element::i32, ov::element::i8, ov::element::u8)) { h->uni_vcvttps2dq(vmm_dst, vmm_dst); + } break; case ov::element::f16: - if (isa == dnnl::impl::cpu::x64::avx512_core) + if (isa == dnnl::impl::cpu::x64::avx512_core) { h->vcvtph2ps(vmm_dst, Ymm(vmm_src.getIdx())); - else + } else { h->vcvtph2ps(vmm_dst, Xmm(vmm_src.getIdx())); // for avx2_vnni_2? - if (one_of(output_type, ov::element::i32, ov::element::i8, ov::element::u8)) + } + if (one_of(output_type, ov::element::i32, ov::element::i8, ov::element::u8)) { h->uni_vcvttps2dq(vmm_dst, vmm_dst); + } break; case ov::element::i8: h->uni_vpmovsxbd(vmm_dst, vmm_src); @@ -159,18 +169,20 @@ void jit_convert_truncation_emitter::emit_isa(const std::vector& in_vec_ break; case ov::element::f16: if (input_type == ov::element::f32) { - if (isa == dnnl::impl::cpu::x64::avx512_core) + if (isa == dnnl::impl::cpu::x64::avx512_core) { h->vcvtps2ph(ymm_dst, vmm_src, 0x4); - else + } else { h->vcvtps2ph(xmm_dst, vmm_src, 0x4); + } } else { if (one_of(input_type, ov::element::i8, ov::element::u8)) { h->uni_vcvtdq2ps(vmm_dst, vmm_dst); } - if (isa == dnnl::impl::cpu::x64::avx512_core) + if (isa == dnnl::impl::cpu::x64::avx512_core) { h->vcvtps2ph(ymm_dst, vmm_dst, 0x4); - else + } else { h->vcvtps2ph(xmm_dst, vmm_dst, 0x4); + } } break; case ov::element::i8: @@ -188,8 +200,9 @@ void jit_convert_truncation_emitter::emit_isa(const std::vector& in_vec_ void jit_convert_truncation_emitter::register_table_entries() { if (host_isa_ == dnnl::impl::cpu::x64::avx2 && one_of(output_type, ov::element::i8, ov::element::u8) && - !is_i8_and_u8_case()) + !is_i8_and_u8_case()) { push_arg_entry_of("mask_byte", 0x000000ff, true); + } } template @@ -207,8 +220,9 @@ void jit_convert_truncation_emitter::dword2int8(const std::vector& in_ve } else if (isa == dnnl::impl::cpu::x64::avx2) { h->vpand(vmm_dst, vmm_src, table_val("mask_byte")); // to avoid saturation h->uni_vpackssdw(vmm_dst, vmm_dst, vmm_dst); - if (isa != dnnl::impl::cpu::x64::sse41) + if (isa != dnnl::impl::cpu::x64::sse41) { h->vpermq(ymm_dst, ymm_dst, 0x08); + } h->uni_vpackuswb(xmm_dst, xmm_dst, xmm_dst); } } @@ -250,27 +264,32 @@ void jit_convert_saturation_emitter::emit_isa(const std::vector& in_vec_ switch (input_type) { case ov::element::f32: - if (one_of(output_type, ov::element::i32, ov::element::i8, ov::element::u8)) + if (one_of(output_type, ov::element::i32, ov::element::i8, ov::element::u8)) { h->uni_vcvtps2dq(vmm_dst, vmm_src); + } break; case ov::element::i32: - if (one_of(output_type, ov::element::f32, ov::element::bf16, ov::element::f16)) + if (one_of(output_type, ov::element::f32, ov::element::bf16, ov::element::f16)) { h->uni_vcvtdq2ps(vmm_dst, vmm_src); + } break; case ov::element::bf16: h->vpmovzxwd(vmm_dst, vmm_src); h->uni_vpslld(vmm_dst, vmm_dst, 16); - if (one_of(output_type, ov::element::i32, ov::element::i8, ov::element::u8)) + if (one_of(output_type, ov::element::i32, ov::element::i8, ov::element::u8)) { h->uni_vcvttps2dq(vmm_dst, vmm_dst); + } break; case ov::element::f16: - if (isa == dnnl::impl::cpu::x64::avx512_core) + if (isa == dnnl::impl::cpu::x64::avx512_core) { h->vcvtph2ps(vmm_dst, Ymm(vmm_src.getIdx())); - else + } else { h->vcvtph2ps(vmm_dst, Xmm(vmm_src.getIdx())); // for avx2_vnni_2? - if (one_of(output_type, ov::element::i32, ov::element::i8, ov::element::u8)) + } + if (one_of(output_type, ov::element::i32, ov::element::i8, ov::element::u8)) { h->uni_vcvttps2dq(vmm_dst, vmm_dst); + } break; case ov::element::i8: h->uni_vpmovsxbd(vmm_dst, vmm_src); @@ -302,18 +321,20 @@ void jit_convert_saturation_emitter::emit_isa(const std::vector& in_vec_ break; case ov::element::f16: if (input_type == ov::element::f32) { - if (isa == dnnl::impl::cpu::x64::avx512_core) + if (isa == dnnl::impl::cpu::x64::avx512_core) { h->vcvtps2ph(ymm_dst, vmm_src, 0x4); - else + } else { h->vcvtps2ph(xmm_dst, vmm_src, 0x4); + } } else { if (one_of(input_type, ov::element::i8, ov::element::u8)) { h->uni_vcvtdq2ps(vmm_dst, vmm_dst); } - if (isa == dnnl::impl::cpu::x64::avx512_core) + if (isa == dnnl::impl::cpu::x64::avx512_core) { h->vcvtps2ph(ymm_dst, vmm_dst, 0x4); - else + } else { h->vcvtps2ph(xmm_dst, vmm_dst, 0x4); + } } break; case ov::element::i8: @@ -354,18 +375,21 @@ void jit_convert_saturation_emitter::dword2int8(const std::vector& in_ve h->vpmovusdb(xmm_dst, vmm_dst); } } else { - if (is_signed) + if (is_signed) { h->uni_vpackssdw(vmm_dst, vmm_src, vmm_src); - else + } else { h->uni_vpackusdw(vmm_dst, vmm_src, vmm_src); + } - if (isa != dnnl::impl::cpu::x64::sse41) + if (isa != dnnl::impl::cpu::x64::sse41) { h->vpermq(ymm_dst, ymm_dst, 0x08); + } - if (is_signed) + if (is_signed) { h->uni_vpacksswb(xmm_dst, xmm_dst, xmm_dst); - else + } else { h->uni_vpackuswb(xmm_dst, xmm_dst, xmm_dst); + } } } diff --git a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_dnnl_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_dnnl_emitters.cpp index e8d2f9d0936f14..229055616f5537 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_dnnl_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_dnnl_emitters.cpp @@ -71,16 +71,19 @@ void jit_dnnl_emitter::emit_code(const std::vector& in_vec_idxs, const std::vector& pool_vec_idxs, const std::vector& pool_gpr_idxs) const { if (host_isa_ == cpu::x64::sse41) { - if (out_vec_idxs[0] != in_vec_idxs[0]) + if (out_vec_idxs[0] != in_vec_idxs[0]) { h->uni_vmovups(Xmm(out_vec_idxs[0]), Xmm(in_vec_idxs[0])); + } eltwise_injector_sse42->compute_vector(out_vec_idxs[0]); } else if (host_isa_ == cpu::x64::avx2) { - if (out_vec_idxs[0] != in_vec_idxs[0]) + if (out_vec_idxs[0] != in_vec_idxs[0]) { h->uni_vmovups(Ymm(out_vec_idxs[0]), Ymm(in_vec_idxs[0])); + } eltwise_injector_avx2->compute_vector(out_vec_idxs[0]); } else if (host_isa_ == cpu::x64::avx512_core) { - if (out_vec_idxs[0] != in_vec_idxs[0]) + if (out_vec_idxs[0] != in_vec_idxs[0]) { h->uni_vmovups(Zmm(out_vec_idxs[0]), Zmm(in_vec_idxs[0])); + } eltwise_injector_avx512_core->compute_vector(out_vec_idxs[0]); } else { OV_CPU_JIT_EMITTER_THROW("Unsupported ISA ", host_isa_); diff --git a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_dnnl_ext_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_dnnl_ext_emitters.hpp index 0b7396b6fcd830..673f16ef3bce2e 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_dnnl_ext_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_dnnl_ext_emitters.hpp @@ -157,14 +157,15 @@ class jit_gelu_v7_emitter : public jit_dnnl_emitter { : jit_dnnl_emitter(host, host_isa, n, exec_prc) { auto gelu = getNgraphOpAs(n); ov::op::GeluApproximationMode approximationMode = gelu->get_approximation_mode(); - if (approximationMode == ov::op::GeluApproximationMode::ERF) + if (approximationMode == ov::op::GeluApproximationMode::ERF) { kind = dnnl_eltwise_gelu_erf; - else if (approximationMode == ov::op::GeluApproximationMode::TANH) + } else if (approximationMode == ov::op::GeluApproximationMode::TANH) { kind = dnnl_eltwise_gelu_tanh; - else + } else { OPENVINO_THROW_NOT_IMPLEMENTED( "Subgraph node doesn't support ngraph operation Gelu with approximation mode: ", approximationMode); + } set_injector(); } diff --git a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_eltwise_emitters.cpp index 80143ebb2a3efb..f76d6a2168db6a 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_eltwise_emitters.cpp @@ -159,8 +159,9 @@ void jit_mul_add_emitter::emit_isa(const std::vector& in_vec_idxs, vmm_mul1 = vmm_src1; } - if (vmm_dst.getIdx() != vmm_src2.getIdx()) + if (vmm_dst.getIdx() != vmm_src2.getIdx()) { h->uni_vmovups(vmm_dst, vmm_src2); + } h->uni_vfmadd231ps(vmm_dst, vmm_mul0, vmm_mul1); } break; @@ -508,16 +509,18 @@ void jit_floor_mod_emitter::emit_isa(const std::vector& in_vec_idxs, Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]); if (isa == x64::sse41) { - if (vmm_dst.getIdx() != vmm_src0.getIdx()) + if (vmm_dst.getIdx() != vmm_src0.getIdx()) { h->uni_vmovups(vmm_dst, vmm_src0); + } h->uni_vmovups(vmm_aux0, vmm_src0); h->uni_vdivps(vmm_aux0, vmm_aux0, vmm_src1); h->uni_vroundps(vmm_aux0, vmm_aux0, 1); // rounding down h->uni_vmulps(vmm_aux0, vmm_aux0, vmm_src1); h->uni_vsubps(vmm_dst, vmm_dst, vmm_aux0); } else { - if (vmm_dst.getIdx() != vmm_src0.getIdx()) + if (vmm_dst.getIdx() != vmm_src0.getIdx()) { h->uni_vmovups(vmm_dst, vmm_src0); + } h->uni_vdivps(vmm_aux0, vmm_src0, vmm_src1); h->uni_vroundps(vmm_aux0, vmm_aux0, 1); // rounding down h->uni_vmulps(vmm_aux0, vmm_aux0, vmm_src1); @@ -567,16 +570,18 @@ void jit_mod_emitter::emit_isa(const std::vector& in_vec_idxs, const std Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]); if (isa == x64::sse41) { - if (vmm_dst.getIdx() != vmm_src0.getIdx()) + if (vmm_dst.getIdx() != vmm_src0.getIdx()) { h->uni_vmovups(vmm_dst, vmm_src0); + } h->uni_vmovups(vmm_aux0, vmm_src0); h->uni_vdivps(vmm_aux0, vmm_aux0, vmm_src1); h->uni_vroundps(vmm_aux0, vmm_aux0, 3); // truncate h->uni_vmulps(vmm_aux0, vmm_aux0, vmm_src1); h->uni_vsubps(vmm_dst, vmm_dst, vmm_aux0); } else { - if (vmm_dst.getIdx() != vmm_src0.getIdx()) + if (vmm_dst.getIdx() != vmm_src0.getIdx()) { h->uni_vmovups(vmm_dst, vmm_src0); + } h->uni_vdivps(vmm_aux0, vmm_src0, vmm_src1); h->uni_vroundps(vmm_aux0, vmm_aux0, 3); // truncate h->uni_vmulps(vmm_aux0, vmm_aux0, vmm_src1); @@ -635,8 +640,9 @@ void jit_maximum_emitter::emit_isa(const std::vector& in_vec_idxs, }; if (isa == x64::sse41) { - if (vmm_src0.getIdx() != vmm_dst.getIdx()) + if (vmm_src0.getIdx() != vmm_dst.getIdx()) { h->uni_vmovups(vmm_dst, vmm_src0); + } uni_vmax(vmm_dst, vmm_dst, vmm_src1); } else { uni_vmax(vmm_dst, vmm_src0, vmm_src1); @@ -695,8 +701,9 @@ void jit_minimum_emitter::emit_isa(const std::vector& in_vec_idxs, }; if (isa == x64::sse41) { - if (vmm_src0.getIdx() != vmm_dst.getIdx()) + if (vmm_src0.getIdx() != vmm_dst.getIdx()) { h->uni_vmovups(vmm_dst, vmm_src0); + } uni_vmin(vmm_dst, vmm_dst, vmm_src1); } else { uni_vmin(vmm_dst, vmm_src0, vmm_src1); @@ -760,8 +767,9 @@ void jit_squared_difference_emitter::emit_isa(const std::vector& in_vec_ }; if (isa == x64::sse41) { - if (vmm_src0.getIdx() != vmm_dst.getIdx()) + if (vmm_src0.getIdx() != vmm_dst.getIdx()) { h->uni_vmovups(vmm_dst, vmm_src0); + } uni_vsqdiff(vmm_dst, vmm_dst, vmm_src1); } else { uni_vsqdiff(vmm_dst, vmm_src0, vmm_src1); @@ -823,18 +831,20 @@ void jit_power_dynamic_emitter::emit_isa(const std::vector& in_vec_idxs, size_t n_gprs_to_save = sizeof(gprs_to_save) / sizeof(gprs_to_save[0]); h->sub(h->rsp, n_gprs_to_save * gpr_size); - for (size_t i = 0; i < n_gprs_to_save; ++i) + for (size_t i = 0; i < n_gprs_to_save; ++i) { h->mov(h->ptr[h->rsp + i * gpr_size], gprs_to_save[i]); + } // caller obligation to save k-regs as callee may use them size_t n_k_regs_to_save = 8; if (isa == x64::avx512_core) { h->sub(h->rsp, n_k_regs_to_save * k_mask_size); for (size_t i = 0; i < n_k_regs_to_save; ++i) { - if (x64::mayiuse(x64::avx512_core)) + if (x64::mayiuse(x64::avx512_core)) { h->kmovq(h->ptr[h->rsp + i * k_mask_size], Opmask(i)); - else + } else { h->kmovw(h->ptr[h->rsp + i * k_mask_size], Opmask(i)); + } } } @@ -846,8 +856,9 @@ void jit_power_dynamic_emitter::emit_isa(const std::vector& in_vec_idxs, // `vlen` should be replaced with `host_isa::vlen` and // `host_isa::vecs_count`. h->sub(h->rsp, (get_max_vecs_count() + 2) * get_vec_length()); - for (size_t i = 2; i < get_max_vecs_count() + 2; ++i) + for (size_t i = 2; i < get_max_vecs_count() + 2; ++i) { h->uni_vmovups(h->ptr[h->rsp + i * get_vec_length()], Vmm(i - 2)); + } h->uni_vmovups(h->ptr[h->rsp + 0 * get_vec_length()], vmm_src0); // src h->uni_vmovups(h->ptr[h->rsp + 1 * get_vec_length()], vmm_src1); // beta @@ -871,25 +882,28 @@ void jit_power_dynamic_emitter::emit_isa(const std::vector& in_vec_idxs, h->add(h->rsp, h->rbx); // restore vector registers - for (size_t i = get_max_vecs_count() + 1; i >= 2; --i) + for (size_t i = get_max_vecs_count() + 1; i >= 2; --i) { h->uni_vmovups(Vmm(i - 2), h->ptr[h->rsp + i * get_vec_length()]); + } h->uni_vmovups(vmm_dst, h->ptr[h->rsp + 0 * get_vec_length()]); h->add(h->rsp, (get_max_vecs_count() + 2) * get_vec_length()); // restore k registers if (isa == x64::avx512_core) { for (int i = n_k_regs_to_save - 1; i >= 0; --i) { - if (x64::mayiuse(x64::avx512_core)) + if (x64::mayiuse(x64::avx512_core)) { h->kmovq(Opmask(i), h->ptr[h->rsp + i * k_mask_size]); - else + } else { h->kmovw(Opmask(i), h->ptr[h->rsp + i * k_mask_size]); + } } h->add(h->rsp, n_k_regs_to_save * k_mask_size); } // restore gpr registers - for (int i = n_gprs_to_save - 1; i >= 0; --i) + for (int i = n_gprs_to_save - 1; i >= 0; --i) { h->mov(gprs_to_save[i], h->ptr[h->rsp + i * gpr_size]); + } h->add(h->rsp, n_gprs_to_save * gpr_size); } @@ -1755,8 +1769,9 @@ void jit_power_static_emitter::emit_isa(const std::vector& in_vec_idxs, } } } else { - if (vmm_dst.getIdx() != vmm_src0.getIdx()) + if (vmm_dst.getIdx() != vmm_src0.getIdx()) { h->uni_vmovups(vmm_dst, vmm_src0); + } } if (power == 1.f) { @@ -1776,10 +1791,12 @@ void jit_power_static_emitter::emit_isa(const std::vector& in_vec_idxs, int64_t ipower = std::abs(static_cast(power)) - 1; h->uni_vmovups(vmm_aux0, vmm_dst); while (ipower > 0) { - if (ipower & 0x1) + if (ipower & 0x1) { h->uni_vmulps(vmm_dst, vmm_dst, vmm_aux0); - if (ipower > 1) + } + if (ipower > 1) { h->uni_vmulps(vmm_aux0, vmm_aux0, vmm_aux0); + } ipower = ipower >> 1; } @@ -1802,18 +1819,20 @@ void jit_power_static_emitter::emit_isa(const std::vector& in_vec_idxs, size_t n_gprs_to_save = sizeof(gprs_to_save) / sizeof(gprs_to_save[0]); h->sub(h->rsp, n_gprs_to_save * gpr_size); - for (size_t i = 0; i < n_gprs_to_save; ++i) + for (size_t i = 0; i < n_gprs_to_save; ++i) { h->mov(h->ptr[h->rsp + i * gpr_size], gprs_to_save[i]); + } // caller obligation to save k-regs as callee may use them size_t n_k_regs_to_save = 8; if (isa == x64::avx512_core) { h->sub(h->rsp, n_k_regs_to_save * k_mask_size); for (size_t i = 0; i < n_k_regs_to_save; ++i) { - if (x64::mayiuse(x64::avx512_core)) + if (x64::mayiuse(x64::avx512_core)) { h->kmovq(h->ptr[h->rsp + i * k_mask_size], Opmask(i)); - else + } else { h->kmovw(h->ptr[h->rsp + i * k_mask_size], Opmask(i)); + } } } @@ -1825,8 +1844,9 @@ void jit_power_static_emitter::emit_isa(const std::vector& in_vec_idxs, // `vlen` should be replaced with `host_isa::vlen` and // `host_isa::vecs_count`. h->sub(h->rsp, (get_max_vecs_count() + 2) * get_vec_length()); - for (size_t i = 2; i < get_max_vecs_count() + 2; ++i) + for (size_t i = 2; i < get_max_vecs_count() + 2; ++i) { h->uni_vmovups(h->ptr[h->rsp + i * get_vec_length()], Vmm(i - 2)); + } h->uni_vmovups(h->ptr[h->rsp + 0 * get_vec_length()], vmm_dst); // src h->uni_vmovups(h->ptr[h->rsp + 1 * get_vec_length()], vmm_aux0); // beta @@ -1850,25 +1870,28 @@ void jit_power_static_emitter::emit_isa(const std::vector& in_vec_idxs, h->add(h->rsp, h->rbx); // restore vector registers - for (size_t i = get_max_vecs_count() + 1; i >= 2; --i) + for (size_t i = get_max_vecs_count() + 1; i >= 2; --i) { h->uni_vmovups(Vmm(i - 2), h->ptr[h->rsp + i * get_vec_length()]); + } h->uni_vmovups(vmm_dst, h->ptr[h->rsp + 0 * get_vec_length()]); h->add(h->rsp, (get_max_vecs_count() + 2) * get_vec_length()); // restore k registers if (isa == x64::avx512_core) { for (int i = n_k_regs_to_save - 1; i >= 0; --i) { - if (x64::mayiuse(x64::avx512_core)) + if (x64::mayiuse(x64::avx512_core)) { h->kmovq(Opmask(i), h->ptr[h->rsp + i * k_mask_size]); - else + } else { h->kmovw(Opmask(i), h->ptr[h->rsp + i * k_mask_size]); + } } h->add(h->rsp, n_k_regs_to_save * k_mask_size); } // restore gpr registers - for (int i = n_gprs_to_save - 1; i >= 0; --i) + for (int i = n_gprs_to_save - 1; i >= 0; --i) { h->mov(gprs_to_save[i], h->ptr[h->rsp + i * gpr_size]); + } h->add(h->rsp, n_gprs_to_save * gpr_size); } } @@ -1933,8 +1956,9 @@ void jit_prelu_emitter::emit_isa(const std::vector& in_vec_idxs, h->cmpps(vmm_aux0, vmm_src0, _cmp_gt_os); h->movups(vmm_aux1, vmm_src1); h->mulps(vmm_aux1, vmm_src0); - if (vmm_src0.getIdx() != vmm_dst.getIdx()) + if (vmm_src0.getIdx() != vmm_dst.getIdx()) { h->movups(vmm_dst, vmm_src0); + } h->blendvps(vmm_dst, vmm_aux1); } else if (isa == x64::avx2) { h->vmulps(vmm_aux0, vmm_src0, vmm_src1); @@ -1943,8 +1967,9 @@ void jit_prelu_emitter::emit_isa(const std::vector& in_vec_idxs, h->vblendvps(vmm_dst, vmm_aux0, vmm_src0, vmm_aux1); } else if (isa == x64::avx512_core) { h->vxorpd(vmm_aux0, vmm_aux0, vmm_aux0); - if (vmm_src0.getIdx() != vmm_dst.getIdx()) + if (vmm_src0.getIdx() != vmm_dst.getIdx()) { h->vmovups(vmm_dst, vmm_src0); + } h->vcmpps(k_mask, vmm_src0, vmm_aux0, _cmp_lt_os); h->vmulps(vmm_dst | k_mask, vmm_src0, vmm_src1); } @@ -2531,12 +2556,13 @@ std::set> jit_select_emitter::get_supported_precision } size_t jit_select_emitter::aux_vecs_count() const { - if (host_isa_ == x64::avx512_core) + if (host_isa_ == x64::avx512_core) { return 0; - else if (host_isa_ == x64::avx2) // tmp vec for mask + } else if (host_isa_ == x64::avx2) { // tmp vec for mask return 1; - else // mask should be xmm0 on sse41 + tmp vec for mask + } else { // mask should be xmm0 on sse41 + tmp vec for mask return 2; + } } void jit_select_emitter::emit_impl(const std::vector& in_vec_idxs, diff --git a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_emitter.cpp b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_emitter.cpp index 23b6192d7b0918..95f9d2a446867a 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_emitter.cpp @@ -73,18 +73,21 @@ void jit_emitter::emitter_preamble(const std::vector& in_idxs, bool is_vec_output = (in_out_type_ == emitter_in_out_map::vec_to_vec) || (in_out_type_ == emitter_in_out_map::gpr_to_vec); - for (auto idx : pool_vec_idxs) + for (auto idx : pool_vec_idxs) { aux_vec_idxs.push_back(idx); + } // For sse41 mask register has to be Xmm(0) if (host_isa_ == cpu::x64::sse41 && aux_vecs_count() > 0) { size_t idx = 0; - if (is_vec_input) + if (is_vec_input) { OV_CPU_JIT_EMITTER_ASSERT(std::find(in_idxs.begin(), in_idxs.end(), idx) == in_idxs.end(), "Xmm(0) cannot be input register in SSE41"); - if (is_vec_output) + } + if (is_vec_output) { OV_CPU_JIT_EMITTER_ASSERT(std::find(out_idxs.begin(), out_idxs.end(), idx) == out_idxs.end(), "Xmm(0) cannot be output register in SSE41"); + } if (std::find(aux_vec_idxs.begin(), aux_vec_idxs.end(), idx) == aux_vec_idxs.end()) { aux_vec_idxs.push_back(idx); preserved_vec_idxs.push_back(idx); @@ -102,57 +105,71 @@ void jit_emitter::emitter_preamble(const std::vector& in_idxs, } for (size_t idx = 0; idx < get_max_vecs_count(); idx++) { - if (aux_vec_idxs.size() >= aux_vecs_count()) + if (aux_vec_idxs.size() >= aux_vecs_count()) { break; + } if (is_vec_input) { - if (std::find(in_idxs.begin(), in_idxs.end(), idx) != in_idxs.end()) + if (std::find(in_idxs.begin(), in_idxs.end(), idx) != in_idxs.end()) { continue; + } } if (is_vec_output) { - if (std::find(out_idxs.begin(), out_idxs.end(), idx) != out_idxs.end()) + if (std::find(out_idxs.begin(), out_idxs.end(), idx) != out_idxs.end()) { continue; + } } - if (std::find(aux_vec_idxs.begin(), aux_vec_idxs.end(), idx) != aux_vec_idxs.end()) + if (std::find(aux_vec_idxs.begin(), aux_vec_idxs.end(), idx) != aux_vec_idxs.end()) { continue; - if (std::find(preserved_vec_idxs.begin(), preserved_vec_idxs.end(), idx) != preserved_vec_idxs.end()) + } + if (std::find(preserved_vec_idxs.begin(), preserved_vec_idxs.end(), idx) != preserved_vec_idxs.end()) { continue; + } aux_vec_idxs.push_back(idx); preserved_vec_idxs.push_back(idx); } - if (aux_vec_idxs.size() < aux_vecs_count()) + if (aux_vec_idxs.size() < aux_vecs_count()) { OV_CPU_JIT_EMITTER_THROW("Failed to allocate required number of vector registers"); + } // Same logic but to allocate gprs - for (auto idx : pool_gpr_idxs) + for (auto idx : pool_gpr_idxs) { aux_gpr_idxs.push_back(idx); + } for (size_t gpr_idx = 0; gpr_idx <= Operand::R15; ++gpr_idx) { size_t _idx = Operand::R15 - gpr_idx; // we allocate from the end - if (aux_gpr_idxs.size() >= aux_gprs_count()) + if (aux_gpr_idxs.size() >= aux_gprs_count()) { break; - if (_idx == Operand::RSP) + } + if (_idx == Operand::RSP) { continue; + } if (!is_vec_input) { - if (std::find(in_idxs.begin(), in_idxs.end(), _idx) != in_idxs.end()) + if (std::find(in_idxs.begin(), in_idxs.end(), _idx) != in_idxs.end()) { continue; + } } if (!is_vec_output) { - if (std::find(out_idxs.begin(), out_idxs.end(), _idx) != out_idxs.end()) + if (std::find(out_idxs.begin(), out_idxs.end(), _idx) != out_idxs.end()) { continue; + } } - if (std::find(aux_gpr_idxs.begin(), aux_gpr_idxs.end(), _idx) != aux_gpr_idxs.end()) + if (std::find(aux_gpr_idxs.begin(), aux_gpr_idxs.end(), _idx) != aux_gpr_idxs.end()) { continue; - if (std::find(preserved_gpr_idxs.begin(), preserved_gpr_idxs.end(), _idx) != preserved_gpr_idxs.end()) + } + if (std::find(preserved_gpr_idxs.begin(), preserved_gpr_idxs.end(), _idx) != preserved_gpr_idxs.end()) { continue; + } aux_gpr_idxs.push_back(_idx); preserved_gpr_idxs.push_back(_idx); } - if (aux_gpr_idxs.size() < aux_gprs_count()) + if (aux_gpr_idxs.size() < aux_gprs_count()) { OV_CPU_JIT_EMITTER_THROW("Failed to allocate required number of general-purpose registers"); + } if (!entry_map_.empty()) { // last aux_gpr_idx is for p_table, we can use aux_gpr_idxs from idx 0 for other purpose @@ -160,31 +177,37 @@ void jit_emitter::emitter_preamble(const std::vector& in_idxs, aux_gpr_idxs.erase(aux_gpr_idxs.end() - 1); } - for (size_t i = 0; i < preserved_gpr_idxs.size(); ++i) + for (size_t i = 0; i < preserved_gpr_idxs.size(); ++i) { h->push(Reg64(preserved_gpr_idxs[i])); + } - if (preserved_vec_idxs.size()) + if (preserved_vec_idxs.size()) { h->sub(h->rsp, preserved_vec_idxs.size() * get_vec_length()); + } for (size_t i = 0; i < preserved_vec_idxs.size(); ++i) { push_vec(h->ptr[h->rsp + i * get_vec_length()], preserved_vec_idxs[i]); } - if (!entry_map_.empty()) + if (!entry_map_.empty()) { load_table_addr(); + } } void jit_emitter::emitter_postamble() const { using namespace Xbyak::util; - for (size_t i = 0; i < preserved_vec_idxs.size(); ++i) + for (size_t i = 0; i < preserved_vec_idxs.size(); ++i) { pop_vec(preserved_vec_idxs[i], h->ptr[h->rsp + i * get_vec_length()]); + } - if (preserved_vec_idxs.size()) + if (preserved_vec_idxs.size()) { h->add(h->rsp, preserved_vec_idxs.size() * get_vec_length()); + } - for (int i = preserved_gpr_idxs.size() - 1; i >= 0; --i) + for (int i = preserved_gpr_idxs.size() - 1; i >= 0; --i) { h->pop(Reg64(preserved_gpr_idxs[i])); + } preserved_vec_idxs.clear(); preserved_gpr_idxs.clear(); @@ -204,8 +227,9 @@ void jit_emitter::emit_data() const { for (auto it = entry_map_.begin(); it != entry_map_.end(); it++) { const auto& te = (*it).second; // get map entry for a given key const auto len = te.bcast ? get_vec_length() : sizeof(table_entry_val_t); - for (size_t d = 0; d < len; d += sizeof(table_entry_val_t)) + for (size_t d = 0; d < len; d += sizeof(table_entry_val_t)) { h->dd(te.val); + } } } diff --git a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp index 72384674edd97e..5543899c92dc24 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp @@ -71,8 +71,9 @@ static int get_aux_regs_as_temp(const int elem_count, bool is_store_as_real16, const int avx512_threshold_for_mask = 0, const bool is_fill = false) { - if (mayiuse(cpu::x64::avx512_core) && is_fill) + if (mayiuse(cpu::x64::avx512_core) && is_fill) { return 1; + } // for pure move, there are direct no-mask instructions to move on full xmm/ymm/zmm, so aux_gpr is not needed. // for move+convert: // there are direct no-mask instructions to load i8/u8/i16/u16/bf16/fp16 to full xmm/ymm/zmm as f32/i32, so aux_gpr @@ -84,11 +85,13 @@ static int get_aux_regs_as_temp(const int elem_count, // so aux_gpr is not needed. const int byte_size = elem_count * data_size; if ((is_pure_move && one_of(byte_size, 16, 32, 64)) || - (!is_pure_move && one_of(elem_count, 4, 8, 16) && !is_store_as_real16)) + (!is_pure_move && one_of(elem_count, 4, 8, 16) && !is_store_as_real16)) { return 0; + } if ((mayiuse(cpu::x64::avx512_core) && (byte_size > avx512_threshold_for_mask)) || - (one_of(byte_size % 16, 1, 2, 3))) + (one_of(byte_size % 16, 1, 2, 3))) { return 1; + } return 0; } @@ -130,8 +133,9 @@ size_t jit_load_emitter::aux_gprs_count() const { is_fill_); // 1 for table address - if (is_fill_) + if (is_fill_) { count++; + } return count; } @@ -193,8 +197,9 @@ void jit_load_emitter::emit_isa(const Xbyak::Reg64& reg_src, const int out_vec_i if (src_prc_ != dst_prc_) { switch (dst_prc_) { case ov::element::f32: - if (!src_prc_.is_real()) + if (!src_prc_.is_real()) { h->uni_vcvtdq2ps(Vmm(out_vec_idx), Vmm(out_vec_idx)); + } break; case ov::element::i32: if (src_prc_.is_real()) { @@ -235,13 +240,16 @@ void jit_load_emitter::load_bytes(const Vmm& vmm, const Xbyak::Reg64& reg, int o MAYBE_UNUSED(is_zmm); // Ensure data fits completely inside the Xmm/Ymm/Zmm register - if (load_size < 0 || load_size > 64) + if (load_size < 0 || load_size > 64) { OV_CPU_JIT_EMITTER_THROW("has unexpected number of values to load in load_byte."); + } // check if proper number bytes fit inside the Xmm/Ymm register - if (is_ymm && load_size > 32) + if (is_ymm && load_size > 32) { OV_CPU_JIT_EMITTER_THROW("has unexpected number of values to load to ymm in load_byte."); - if (is_xmm && load_size > 16) + } + if (is_xmm && load_size > 16) { OV_CPU_JIT_EMITTER_THROW("has unexpected number of values to load to xmm in load_byte."); + } auto xmm = Xbyak::Xmm(vmm.getIdx()); auto ymm = Xbyak::Ymm(vmm.getIdx()); @@ -290,10 +298,11 @@ void jit_load_emitter::load_bytes(const Vmm& vmm, const Xbyak::Reg64& reg, int o if (!one_of(bytes_to_load, 0, 1, 2, 3, 4, 8, 16)) { h->uni_vpxor(vmm, vmm, vmm); } - if (bytes_to_load >= 8 && bytes_to_load < 16) + if (bytes_to_load >= 8 && bytes_to_load < 16) { h->uni_vmovq(xmm, addr(start_bytes)); - else if (bytes_to_load == 16) + } else if (bytes_to_load == 16) { h->uni_vmovdqu(xmm, addr(start_bytes)); + } switch (bytes_to_load) { case 0: @@ -364,10 +373,11 @@ void jit_load_emitter::load_bytes(const Vmm& vmm, const Xbyak::Reg64& reg, int o if (has_xmm_block) { h->vinsertf128(ymm, ymm, xmm, 1); // insert to upper bits of ymm - if (has_ymm_block) + if (has_ymm_block) { h->vinsertf128(ymm, ymm, addr(32), 0); // insert to lower bits of ymm - else + } else { h->vinsertf128(ymm, ymm, addr(0), 0); // insert to lower bits of ymm + } } if (has_ymm_block) { @@ -437,40 +447,46 @@ void jit_load_emitter::load_bytes_to_dword_extension(const Vmm& vmm, // Ensure extended double words fit inside Zmm (32 * load_size <= 512) // For Ymm register, load capacity is halved (32 * load_size <= 256) // For Xmm register, load capacity is halved further (32 * load_size <= 128) - if (load_size < 0 || load_size > 16) + if (load_size < 0 || load_size > 16) { OV_CPU_JIT_EMITTER_THROW("has unexpected number of values to load in load_bytes_to_dword_extension."); - if (is_ymm && load_size > 8) + } + if (is_ymm && load_size > 8) { OV_CPU_JIT_EMITTER_THROW("has unexpected number of values to load to ymm in load_bytes_to_dword_extension."); - if (is_xmm && load_size > 4) + } + if (is_xmm && load_size > 4) { OV_CPU_JIT_EMITTER_THROW("has unexpected number of values to load to xmm in load_bytes_to_dword_extension."); + } // For load_size == 4/8/16, do load/extension in one go switch (load_size) { case 16: { // full size of zmm const auto zmm = Xbyak::Zmm(vmm.getIdx()); - if (is_signed) + if (is_signed) { h->uni_vpmovsxbd(zmm, ptr[reg + offset]); - else + } else { h->uni_vpmovzxbd(zmm, ptr[reg + offset]); + } break; } case 8: { // full size of ymm or ymm_block of zmm const auto ymm = Xbyak::Ymm(vmm.getIdx()); - if (is_signed) + if (is_signed) { h->uni_vpmovsxbd(ymm, ptr[reg + offset]); - else + } else { h->uni_vpmovzxbd(ymm, ptr[reg + offset]); + } break; } case 4: { // full size of xmm or xmm_block of ymm/zmm const auto xmm = Xbyak::Xmm(vmm.getIdx()); - if (is_signed) + if (is_signed) { h->uni_vpmovsxbd(xmm, ptr[reg + offset]); - else + } else { h->uni_vpmovzxbd(xmm, ptr[reg + offset]); + } break; } default: { @@ -479,17 +495,19 @@ void jit_load_emitter::load_bytes_to_dword_extension(const Vmm& vmm, mask = (mask << load_size) - mask; h->mov(Reg32(aux_gpr_idxs[0]), mask); h->kmovw(k_mask, Reg32(aux_gpr_idxs[0])); - if (is_signed) + if (is_signed) { h->uni_vpmovsxbd(vmm | k_mask | T_z, ptr[reg + offset]); - else + } else { h->uni_vpmovzxbd(vmm | k_mask | T_z, ptr[reg + offset]); + } } else { const auto xmm = Xbyak::Xmm(vmm.getIdx()); load_bytes(xmm, reg, offset, load_size); - if (is_signed) + if (is_signed) { h->uni_vpmovsxbd(vmm, xmm); - else + } else { h->uni_vpmovzxbd(vmm, xmm); + } } break; } @@ -532,18 +550,22 @@ void jit_load_emitter::load_words_to_dword_extension(const Vmm& vmm, bool is_f16 = (prc == ov::element::f16); bool is_signed = prc.is_signed(); - if (is_f16 && !mayiuse(cpu::x64::avx2)) + if (is_f16 && !mayiuse(cpu::x64::avx2)) { OV_CPU_JIT_EMITTER_THROW("only support fp16 on platform with avx2 or above."); + } // Ensure extended double words fit inside Zmm (32/2(num) * 32 <= 512) // For Ymm register, load capacity is halved (16/2(num) * 32 <= 128) // For Xmm register, load capacity is halved again (8/2(num) * 32 <= 128) - if (load_size < 0 || load_size > 32) + if (load_size < 0 || load_size > 32) { OV_CPU_JIT_EMITTER_THROW("has unexpected number of values to load in load_words_to_dword_extension."); - if (is_ymm && load_size > 16) + } + if (is_ymm && load_size > 16) { OV_CPU_JIT_EMITTER_THROW("has unexpected number of values to load to ymm in load_words_to_dword_extension."); - if (is_xmm && load_size > 8) + } + if (is_xmm && load_size > 8) { OV_CPU_JIT_EMITTER_THROW("has unexpected number of values to load to xmm in load_words_to_dword_extension."); + } auto xmm = Xbyak::Xmm(vmm.getIdx()); auto ymm = Xbyak::Ymm(vmm.getIdx()); @@ -559,10 +581,11 @@ void jit_load_emitter::load_words_to_dword_extension(const Vmm& vmm, } else if (is_f16) { h->vcvtph2ps(zmm, ptr[reg + offset]); } else { - if (is_signed) + if (is_signed) { h->uni_vpmovsxwd(zmm, ptr[reg + offset]); - else + } else { h->uni_vpmovzxwd(zmm, ptr[reg + offset]); + } } break; } @@ -573,10 +596,11 @@ void jit_load_emitter::load_words_to_dword_extension(const Vmm& vmm, } else if (is_f16) { h->vcvtph2ps(ymm, ptr[reg + offset]); } else { - if (is_signed) + if (is_signed) { h->uni_vpmovsxwd(ymm, ptr[reg + offset]); - else + } else { h->uni_vpmovzxwd(ymm, ptr[reg + offset]); + } } break; } @@ -587,10 +611,11 @@ void jit_load_emitter::load_words_to_dword_extension(const Vmm& vmm, } else if (is_f16) { h->vcvtph2ps(xmm, ptr[reg + offset]); } else { - if (is_signed) + if (is_signed) { h->uni_vpmovsxwd(xmm, ptr[reg + offset]); - else + } else { h->uni_vpmovzxwd(xmm, ptr[reg + offset]); + } } break; } @@ -606,10 +631,11 @@ void jit_load_emitter::load_words_to_dword_extension(const Vmm& vmm, } else if (is_f16) { h->vcvtph2ps(vmm | k_mask | T_z, ptr[reg + offset]); } else { - if (is_signed) + if (is_signed) { h->uni_vpmovsxwd(vmm | k_mask | T_z, ptr[reg + offset]); - else + } else { h->uni_vpmovzxwd(vmm | k_mask | T_z, ptr[reg + offset]); + } } } else { // xmm or ymm version @@ -620,10 +646,11 @@ void jit_load_emitter::load_words_to_dword_extension(const Vmm& vmm, } else if (is_f16) { h->vcvtph2ps(ymm, xmm); } else { - if (is_signed) + if (is_signed) { h->uni_vpmovsxwd(vmm, xmm); - else + } else { h->uni_vpmovzxwd(vmm, xmm); + } } } break; @@ -705,8 +732,9 @@ size_t jit_store_emitter::aux_gprs_count() const { threshold_for_mask_emu_store); // for table value in truncation arithmetic mode - if (is_truncation_emulation()) + if (is_truncation_emulation()) { count++; + } return count; } @@ -716,17 +744,20 @@ size_t jit_store_emitter::aux_vecs_count() const { // to avoid src vmm pollution for data type conversion // and other vmm data pollution instructions - if (src_prc_ != dst_prc_ || !one_of(store_size_, 64, 32, 16)) + if (src_prc_ != dst_prc_ || !one_of(store_size_, 64, 32, 16)) { count++; + } // for data swapping to avoid using Xmm(0) as I/O xmm for jit_uni_vcvtneps2bf16 - if ((host_isa_ == cpu::x64::sse41) && (src_prc_ == ov::element::f32 && dst_prc_ == ov::element::bf16)) + if ((host_isa_ == cpu::x64::sse41) && (src_prc_ == ov::element::f32 && dst_prc_ == ov::element::bf16)) { count++; + } // zero value, zeroed and passed from caller from performance standpoint(zeroed one time and not need preserve and // restore status) - if (mayiuse(cpu::x64::avx512_core) && one_of(dst_prc_, ov::element::u8, ov::element::u16)) + if (mayiuse(cpu::x64::avx512_core) && one_of(dst_prc_, ov::element::u8, ov::element::u16)) { count++; + } return count; } @@ -737,8 +768,9 @@ size_t jit_store_emitter::get_inputs_num() const { void jit_store_emitter::emit_data() const { jit_emitter::emit_data(); - if (uni_vcvtneps2bf16_) + if (uni_vcvtneps2bf16_) { uni_vcvtneps2bf16_->emit_data(); + } } void jit_store_emitter::emit_impl(const std::vector& in_idxs, const std::vector& out_idxs) const { @@ -771,8 +803,9 @@ void jit_store_emitter::emit_isa(const int in_vec_idx, const Xbyak::Reg64& reg_d data_idx = in_vec_idx; data_reg_updated = false; - if (!aux_vec_idxs.empty()) + if (!aux_vec_idxs.empty()) { aux_src_idx = aux_vec_idxs.back(); // to avoid src pollution + } if (src_prc_ != dst_prc_) { switch (src_prc_) { case ov::element::f32: @@ -849,12 +882,15 @@ void jit_store_emitter::store_bytes(const Xbyak::Reg64& reg, int offset, int sto MAYBE_UNUSED(is_zmm); // Ensure data fits completely inside the Xmm/Ymm/Zmm register - if (store_size < 0 || store_size > 64) + if (store_size < 0 || store_size > 64) { OV_CPU_JIT_EMITTER_THROW("has unexpected number of values to store in store_bytes."); - if (is_ymm && store_size > 32) + } + if (is_ymm && store_size > 32) { OV_CPU_JIT_EMITTER_THROW("has unexpected number of values to store to ymm in store_bytes."); - if (is_xmm && store_size > 16) + } + if (is_xmm && store_size > 16) { OV_CPU_JIT_EMITTER_THROW("has unexpected number of values to store to xmm in store_bytes."); + } auto xmm = Xbyak::Xmm(data_idx); auto ymm = Xbyak::Ymm(data_idx); @@ -885,17 +921,19 @@ void jit_store_emitter::store_bytes(const Xbyak::Reg64& reg, int offset, int sto STORE_KEEP_SOURCE(vextractf128, xmm, Xmm(aux_src_idx), ymm, 1); } - if (bytes_to_store >= 8 && bytes_to_store < 16) + if (bytes_to_store >= 8 && bytes_to_store < 16) { h->uni_vmovq(addr(start_bytes), xmm); - else if (bytes_to_store == 16) + } else if (bytes_to_store == 16) { h->uni_vmovdqu(addr(start_bytes), xmm); + } // 64/32/16/8 with one go // tail 7 bytes for lower or upper xmm auto store_one_byte = [&](int bytes_offset, int gpr_idx) { bool ext8bit = false; - if (one_of(gpr_idx, Operand::RSP, Operand::RBP, Operand::RSI, Operand::RDI)) + if (one_of(gpr_idx, Operand::RSP, Operand::RBP, Operand::RSI, Operand::RDI)) { ext8bit = true; + } h->mov(addr(start_bytes + bytes_offset), Reg8(gpr_idx, ext8bit)); }; switch (bytes_to_store) { @@ -1012,12 +1050,15 @@ void jit_store_emitter::store_dword_to_byte_extension(const Xbyak::Reg64& reg, // Ensure data fits completely inside the Xmm/Ymm/Zmm register // At most 8 dwords can fit inside the Ymm register // At most 4 dwords can fit inside the Xmm register - if (store_num < 0 || store_num > 16) + if (store_num < 0 || store_num > 16) { OV_CPU_JIT_EMITTER_THROW("has unexpected number of values to store in store_dword_to_byte_extension."); - if (is_ymm && store_num > 8) + } + if (is_ymm && store_num > 8) { OV_CPU_JIT_EMITTER_THROW("has unexpected number of values to store to ymm in store_dword_to_byte_extension."); - if (is_xmm && store_num > 4) + } + if (is_xmm && store_num > 4) { OV_CPU_JIT_EMITTER_THROW("has unexpected number of values to store to xmm in store_dword_to_byte_extension."); + } auto vmm = Vmm(data_idx); auto zmm = Xbyak::Zmm(data_idx); @@ -1179,12 +1220,15 @@ void jit_store_emitter::store_dword_to_word_extension(const Xbyak::Reg64& reg, // Ensure data fits completely inside the Xmm/Ymm/Zmm register // At most 4 dwords can fit inside the Xmm register // At most 8 dwords can fit inside the Ymm register - if (store_num < 0 || store_num > 16) + if (store_num < 0 || store_num > 16) { OV_CPU_JIT_EMITTER_THROW("has unexpected number of values to store in store_dword_to_word_extension."); - if (is_ymm && store_num > 8) + } + if (is_ymm && store_num > 8) { OV_CPU_JIT_EMITTER_THROW("has unexpected number of values to store to ymm in store_dword_to_word_extension."); - if (is_xmm && store_num > 4) + } + if (is_xmm && store_num > 4) { OV_CPU_JIT_EMITTER_THROW("has unexpected number of values to store to xmm in store_dword_to_word_extension."); + } auto xmm = Xbyak::Xmm(data_idx); auto ymm = Xbyak::Ymm(data_idx); diff --git a/src/plugins/intel_cpu/src/emitters/plugin/x64/utils.cpp b/src/plugins/intel_cpu/src/emitters/plugin/x64/utils.cpp index 0001eac9fd272d..e37b59abe7bbba 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/x64/utils.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/x64/utils.cpp @@ -39,21 +39,25 @@ struct regs_to_spill { static std::vector get(const std::set& live_regs) { std::vector regs_to_spill; auto push_if_live = [&live_regs, ®s_to_spill](Xbyak::Reg&& reg) { - if (live_regs.empty() || live_regs.count(Xbyak2SnippetsReg(reg))) + if (live_regs.empty() || live_regs.count(Xbyak2SnippetsReg(reg))) { regs_to_spill.emplace_back(reg); + } }; for (int i = 0; i < 16; i++) { // do not spill rsp; - if (i != Xbyak::Reg::RSP) + if (i != Xbyak::Reg::RSP) { push_if_live(Reg64(i)); + } } - for (int i = 0; i < cpu_isa_traits::n_vregs; ++i) + for (int i = 0; i < cpu_isa_traits::n_vregs; ++i) { push_if_live(typename cpu_isa_traits::Vmm(i)); + } const int num_k_mask = isa == cpu_isa_t::avx512_core ? 8 : 0; - for (int i = 0; i < num_k_mask; ++i) + for (int i = 0; i < num_k_mask; ++i) { push_if_live(Xbyak::Opmask(i)); + } return regs_to_spill; } }; @@ -213,12 +217,15 @@ cpu_isa_t EmitABIRegSpills::get_isa() { // e.g. other emitters isa is avx512, while this emitter isa is avx2, and internal call is used. Internal call may // use avx512 and spoil k-reg, ZMM. do not care about platform w/ avx512_common but w/o avx512_core(knight landing), // which is obsoleted. - if (mayiuse(cpu_isa_t::avx512_core)) + if (mayiuse(cpu_isa_t::avx512_core)) { return cpu_isa_t::avx512_core; - if (mayiuse(cpu_isa_t::avx2)) + } + if (mayiuse(cpu_isa_t::avx2)) { return cpu_isa_t::avx2; - if (mayiuse(cpu_isa_t::sse41)) + } + if (mayiuse(cpu_isa_t::sse41)) { return cpu_isa_t::sse41; + } OV_CPU_JIT_EMITTER_THROW("unsupported isa"); } diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp index a915fb0fe17e21..eddc0d5d97fdc4 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp @@ -246,8 +246,9 @@ std::vector CPUTargetMachine::get_abi_arg_regs() const { using namespace dnnl::impl::cpu::aarch64; std::vector res; for (const auto& r : - {abi_param1, abi_param2, abi_param3, abi_param4, abi_param5, abi_param6, abi_param7, abi_param8}) + {abi_param1, abi_param2, abi_param3, abi_param4, abi_param5, abi_param6, abi_param7, abi_param8}) { res.emplace_back(snippets::RegType::gpr, r.getIdx()); + } return res; } @@ -257,8 +258,9 @@ std::vector CPUTargetMachine::get_gp_reg_pool() const { std::vector reg_pool; for (size_t i = 0; i < num_gp_regs; i++) { // Note: more details on the usage of reserved registers in aarch64/jit_kernel_emitter.cpp - if (!one_of(i, Operand::SP, Operand::X18, Operand::X23, Operand::X24, Operand::X28, Operand::X29)) + if (!one_of(i, Operand::SP, Operand::X18, Operand::X23, Operand::X24, Operand::X28, Operand::X29)) { reg_pool.emplace_back(snippets::RegType::gpr, i); + } } return reg_pool; } @@ -273,8 +275,10 @@ std::vector CPUTargetMachine::get_vec_reg_pool() const { } }(); std::vector reg_pool; - for (int i = 0; i < num_vec_regs; i++) + reg_pool.reserve(num_vec_regs); + for (int i = 0; i < num_vec_regs; i++) { reg_pool.emplace_back(snippets::RegType::vec, static_cast(i)); + } return reg_pool; } @@ -295,10 +299,11 @@ std::shared_ptr CPUGenerator::clone() const { ov::snippets::RegType CPUGenerator::get_specific_op_out_reg_type(const ov::Output& out) const { const auto op = out.get_node_shared_ptr(); - if (ov::as_type_ptr(op) || ov::as_type_ptr(op)) + if (ov::as_type_ptr(op) || ov::as_type_ptr(op)) { return ov::snippets::RegType::vec; - else + } else { return ov::snippets::RegType::undefined; + } } bool CPUGenerator::uses_precompiled_kernel(const std::shared_ptr& e) const { diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_fill_emitter.cpp b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_fill_emitter.cpp index 053cebe747e529..8a24cb6eed8b1d 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_fill_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_fill_emitter.cpp @@ -27,15 +27,17 @@ jit_fill_emitter::jit_fill_emitter(jit_generator* h, cpu_isa_t isa, const Expres offset = fill->get_offset(); fill_value = fill->get_fill_value(); - if (!is_optimized()) + if (!is_optimized()) { push_arg_entry_of("value", fill_value, true); + } prepare_table(); } size_t jit_fill_emitter::get_aux_gprs_count() const { // Optimized version (fill full vector by zero) doesn't need additional register - if (is_optimized()) + if (is_optimized()) { return 0; + } return 1; } @@ -50,10 +52,11 @@ void jit_fill_emitter::emit_impl(const std::vector& in, const std::vecto template void jit_fill_emitter::emit_isa(const std::vector& in, const std::vector& out) const { - if (is_full_reg()) + if (is_full_reg()) { fill_full(out); - else + } else { fill_tail(in, out); + } } template diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_fill_emitter.hpp b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_fill_emitter.hpp index 7c827e4920d5eb..73df8c4891be0f 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_fill_emitter.hpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_fill_emitter.hpp @@ -37,7 +37,7 @@ class jit_fill_emitter : public jit_emitter { return offset == 0; } bool is_optimized() const { - return is_full_reg() && fill_value == uint32_t(0x0); + return is_full_reg() && fill_value == static_cast(0x0); } size_t offset = 0; diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_kernel_emitter.cpp b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_kernel_emitter.cpp index 417e33d339816c..fbe2aeb03ace2d 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_kernel_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_kernel_emitter.cpp @@ -83,10 +83,12 @@ jit_kernel_emitter::jit_kernel_emitter(jit_generator* h, num_outputs = results.size(); std::vector data_ptr_regs; data_ptr_regs.reserve(num_inputs + num_outputs); - for (const auto& param : parameters) + for (const auto& param : parameters) { data_ptr_regs.push_back(param->get_output_port_descriptor(0)->get_reg()); - for (const auto& result : results) + } + for (const auto& result : results) { data_ptr_regs.push_back(result->get_input_port_descriptor(0)->get_reg()); + } std::set unique_buffers; for (const auto& buffer_expr : buffers) { @@ -137,8 +139,9 @@ void jit_kernel_emitter::emit_impl(const std::vector& in, const std::vec std::inserter(available_gpr, available_gpr.begin()), convert); // Note: data_ptr regs are globally live, so it makes no sense to keep them in the pool - for (auto idx : data_ptr_regs_idx) + for (auto idx : data_ptr_regs_idx) { available_gpr.erase({snippets::RegType::gpr, idx}); + } reg_type = snippets::RegType::vec; std::transform(aux_vec_idxs.begin(), aux_vec_idxs.end(), @@ -172,12 +175,14 @@ void jit_kernel_emitter::emit_impl(const std::vector& in, const std::vec const auto& node = expression->get_node(); // Note: currently only a few operations are allowed to have mixed in/out register types => skip validation here if (!ov::is_type(node) && !ov::is_type(node) && - !std::dynamic_pointer_cast(emitter)) + !std::dynamic_pointer_cast(emitter)) { std::tie(expected_in_type, expected_out_type) = get_expected_reg_types(emitter); + } // Note: live regs = regs live on input of the expression. We also need to exclude output regs from the pool auto live_regs = expression->get_live_regs(); - for (auto r : reg_info.second) + for (auto r : reg_info.second) { live_regs.insert(r); + } std::vector pool_gp_reg; std::vector pool_vec_reg; std::set_difference(available_gpr.begin(), @@ -247,12 +252,13 @@ void jit_kernel_static_emitter::init_data_pointers(const std::vector& arg_ ptr(reg_runtime_params, static_cast(GET_OFF(buffer_scratchpad_ptr)))); } for (size_t i = 0; i < num_params; i++) { - if (i < num_inputs) + if (i < num_inputs) { h->ldr(data_ptr_regs[i], ptr(reg_runtime_params, static_cast(GET_OFF(src_ptrs) + i * sizeof(void*)))); - else + } else { h->ldr(data_ptr_regs[i], ptr(reg_runtime_params, static_cast(GET_OFF(dst_ptrs) + (i - num_inputs) * sizeof(void*)))); + } init_ptr_with_offset(data_ptr_regs[i], data_offsets[i]); } } @@ -276,12 +282,13 @@ void jit_kernel_dynamic_emitter::init_data_pointers(const std::vector& arg ptr(reg_runtime_params, static_cast(GET_OFF(buffer_scratchpad_ptr)))); } for (size_t i = 0; i < num_params; i++) { - if (i < num_inputs) + if (i < num_inputs) { h->ldr(data_ptr_regs[i], ptr(reg_runtime_params, static_cast(GET_OFF(src_ptrs) + i * sizeof(void*)))); - else + } else { h->ldr(data_ptr_regs[i], ptr(reg_runtime_params, static_cast(GET_OFF(dst_ptrs) + (i - num_inputs) * sizeof(void*)))); + } } } diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_loop_emitters.cpp b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_loop_emitters.cpp index 9e3dc9674079b1..aedeb072e7d791 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_loop_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_loop_emitters.cpp @@ -141,8 +141,9 @@ void jit_loop_end_emitter::emit_impl(const std::vector& in, const std::v XReg reg_work_amount = XReg(in.back()); if (!evaluate_once) { for (size_t idx = 0; idx < data_ptr_reg_idxs.size(); idx++) { - if (!is_incremented[idx] || ptr_increments[idx] == 0) + if (!is_incremented[idx] || ptr_increments[idx] == 0) { continue; + } XReg data_reg = XReg(data_ptr_reg_idxs[idx]); if (ptr_increments[idx] > 0) { h->add_imm(data_reg, data_reg, ptr_increments[idx] * wa_increment * data_sizes[idx], h->X_TMP_0); @@ -156,8 +157,9 @@ void jit_loop_end_emitter::emit_impl(const std::vector& in, const std::v } for (size_t idx = 0; idx < data_ptr_reg_idxs.size(); idx++) { - if (!is_incremented[idx] || finalization_offsets[idx] == 0) + if (!is_incremented[idx] || finalization_offsets[idx] == 0) { continue; + } XReg data_reg = XReg(static_cast(data_ptr_reg_idxs[idx])); if (finalization_offsets[idx] > 0) { h->add_imm(data_reg, data_reg, finalization_offsets[idx] * data_sizes[idx], h->X_TMP_0); diff --git a/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.cpp b/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.cpp index 3ad41d707bb96b..639b54d8ade4c3 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.cpp @@ -53,8 +53,9 @@ void CPURuntimeConfigurator::initialization(const ov::snippets::lowered::LinearI void CPURuntimeConfigurator::update(const ov::snippets::lowered::LinearIRCPtr& linear_ir) { RuntimeConfigurator::update(linear_ir); - if (linear_ir->is_dynamic()) + if (linear_ir->is_dynamic()) { update_loop_args(linear_ir); + } } void CPURuntimeConfigurator::update_tensor_rank(const ov::snippets::VectorDims& master_shape) const { diff --git a/src/plugins/intel_cpu/src/emitters/snippets/utils/debug_caps_config.cpp b/src/plugins/intel_cpu/src/emitters/snippets/utils/debug_caps_config.cpp index e4c3c40e1d8120..8350de5f56840f 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/utils/debug_caps_config.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/utils/debug_caps_config.cpp @@ -14,7 +14,7 @@ void SnippetsDebugCapsConfig::readProperties() { if (env && *env) return env; - return (const char*)nullptr; + return static_cast(nullptr); }; enable_segfault_detector = readEnv("OV_CPU_SNIPPETS_SEGFAULT_DETECTOR") ? true : false; diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp index 5e4a8992aa7165..32d3d2b68c11b8 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp @@ -344,8 +344,9 @@ size_t intel_cpu::CPUTargetMachine::get_lanes() const { std::vector intel_cpu::CPUTargetMachine::get_abi_arg_regs() const { const auto& abi_regs = dnnl::impl::cpu::x64::abi_param_regs; std::vector res; - for (const auto& r : abi_regs) + for (const auto& r : abi_regs) { res.emplace_back(snippets::RegType::gpr, r); + } return res; } @@ -353,8 +354,9 @@ std::vector intel_cpu::CPUTargetMachine::get_gp_reg_pool() const const auto num_gp_regs = 16; std::vector reg_pool; for (size_t i = 0; i < num_gp_regs; i++) { - if (!one_of(i, Xbyak::Operand::RSP)) + if (!one_of(i, Xbyak::Operand::RSP)) { reg_pool.emplace_back(snippets::RegType::gpr, i); + } } return reg_pool; } @@ -374,8 +376,9 @@ std::vector intel_cpu::CPUTargetMachine::get_vec_reg_pool() const }(); std::vector reg_pool; reg_pool.reserve(num_vec_regs); - for (int i = 0; i < num_vec_regs; i++) + for (int i = 0; i < num_vec_regs; i++) { reg_pool.emplace_back(snippets::RegType::vec, static_cast(i)); + } return reg_pool; } @@ -433,12 +436,13 @@ ov::snippets::RegType intel_cpu::CPUGenerator::get_specific_op_out_reg_type(cons std::dynamic_pointer_cast(op) || is_type(op) || #endif - is_type(op)) + is_type(op)) { return ov::snippets::RegType::gpr; - else if (is_type(op) || is_type(op)) + } else if (is_type(op) || is_type(op)) { return ov::snippets::RegType::vec; - else + } else { return ov::snippets::RegType::undefined; + } } bool intel_cpu::CPUGenerator::uses_precompiled_kernel(const std::shared_ptr& e) const { diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_binary_call_emitter.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_binary_call_emitter.cpp index 568e3dffab35e7..2cc1d2758578d1 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_binary_call_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_binary_call_emitter.cpp @@ -37,16 +37,18 @@ void jit_binary_call_emitter::init_binary_call_regs(size_t num_binary_args, OV_CPU_JIT_EMITTER_ASSERT(sizeof(abi_param_regs) / sizeof(*abi_param_regs) >= num_binary_args, "Requested number of runtime arguments is not supported"); // This regs will be corrupted, since we'll use them to pass runtime args - for (size_t i = 0; i < num_binary_args; i++) + for (size_t i = 0; i < num_binary_args; i++) { m_regs_to_spill.emplace(snippets::RegType::gpr, abi_param_regs[i]); + } // Note: aux_gpr idx must be non-empty because aux_gprs_count() returns 1 for this emitter OV_CPU_JIT_EMITTER_ASSERT(aux_gprs_count() >= 1, "Invalid aux_gpr count"); m_call_address_reg = Reg64(static_cast(aux_gpr_idxs.back())); aux_gpr_idxs.pop_back(); bool spill_required = false; m_callee_saved_reg = Reg64(static_cast(get_callee_saved_aux_gpr(aux_gpr_idxs, used_gpr_idxs, spill_required))); - if (spill_required) + if (spill_required) { m_regs_to_spill.emplace(snippets::RegType::gpr, m_callee_saved_reg.getIdx()); + } m_regs_initialized = true; } diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_copy_b_emitter.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_copy_b_emitter.cpp index 0bd6c02b42783e..f68ff469577720 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_copy_b_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_copy_b_emitter.cpp @@ -68,8 +68,9 @@ void jit_brgemm_copy_b_emitter::validate_arguments(const std::vector& in void jit_brgemm_copy_b_emitter::emit_impl(const std::vector& in, const std::vector& out) const { validate_arguments(in, out); std::vector mem_ptrs_idxs{in[0], out[0]}; - if (out.size() > 1) + if (out.size() > 1) { mem_ptrs_idxs.emplace_back(out[1]); + } init_binary_call_regs(2, mem_ptrs_idxs); const Xbyak::Reg64& aux_reg = get_call_address_reg(); @@ -99,8 +100,9 @@ void jit_brgemm_copy_b_emitter::emit_impl(const std::vector& in, const s } // No scratchpad => need to write nullptr manually - if (!m_with_comp) + if (!m_with_comp) { h->mov(h->qword[h->rsp + args_offsets.back()], reinterpret_cast(nullptr)); + } h->mov(aux_reg, reinterpret_cast(BrgemmCopyBKernelExecutor::execute)); h->mov(abi_param1, reinterpret_cast(m_kernel_executor.get())); diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_emitter.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_emitter.cpp index ab6c9d0d0e567f..e79913a2b3f412 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_emitter.cpp @@ -71,8 +71,9 @@ std::set> jit_brgemm_emitter::get_supported_precision std::set> supported_types = {{element::u8, element::i8}, {element::bf16, element::bf16}, {element::f32, element::f32}}; - if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2)) + if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2)) { supported_types.insert({element::i8, element::i8}); + } return supported_types; } else if (brgemm->get_type() == BRGEMM_TYPE::WITH_COMPENSATIONS) { return {{element::i8, element::i8, element::f32}}; @@ -94,15 +95,17 @@ void jit_brgemm_emitter::emit_impl(const std::vector& in, const std::vec validate_arguments(in, out); std::vector mem_ptrs_idxs{in[0], in[1], out[0]}; init_binary_call_regs(2, mem_ptrs_idxs); - if (in.size() > 2) + if (in.size() > 2) { mem_ptrs_idxs.emplace_back(in[2]); + } - if (std::dynamic_pointer_cast(m_kernel_executor)) + if (std::dynamic_pointer_cast(m_kernel_executor)) { emit_call(mem_ptrs_idxs); - else if (std::dynamic_pointer_cast(m_kernel_executor)) + } else if (std::dynamic_pointer_cast(m_kernel_executor)) { emit_call(mem_ptrs_idxs); - else + } else { OV_CPU_JIT_EMITTER_THROW("uknown execuor type"); + } } template ::value, bool>::type> @@ -138,8 +141,9 @@ void jit_brgemm_emitter::emit_call(const std::vector& mem_ptrs_idxs) con } // No scratchpad => need to write nullptr manually - if (mem_ptrs.size() < 4) + if (mem_ptrs.size() < 4) { h->mov(h->qword[h->rsp + brgemm_args_offsets.back()], reinterpret_cast(nullptr)); + } // abi_param1 always contains jit_snippets_call_args which has amx tile config for each thread if (std::is_same()) { diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_fill_emitter.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_fill_emitter.cpp index 687917acbabc5a..9875493c6b50b5 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_fill_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_fill_emitter.cpp @@ -22,18 +22,21 @@ jit_fill_emitter::jit_fill_emitter(dnnl::impl::cpu::x64::jit_generator* h, offset = fill->get_offset(); fill_value = fill->get_fill_value(); - if (!is_optimized()) + if (!is_optimized()) { push_arg_entry_of("value", fill_value, true); + } prepare_table(); } size_t jit_fill_emitter::aux_gprs_count() const { // Optimized version (fill full vector by zero) doesn't need additional register - if (is_optimized()) + if (is_optimized()) { return 0; + } // + 1 reg for table value in full vector case - if (is_full_reg()) + if (is_full_reg()) { return 1; + } // + 1 reg for temp reg for mask in avx512 return one_of(host_isa_, dnnl::impl::cpu::x64::avx512_core) ? 2 : 1; } @@ -65,8 +68,9 @@ void jit_fill_emitter::emit_isa(const std::vector& in, const std::vector // removed from the LIR // TODO: when inplace is supported, remove such Fill ops from the LIR and remove this logic. // Ticket: 126270 - if (src_vmm.getIdx() != dst_vmm.getIdx()) + if (src_vmm.getIdx() != dst_vmm.getIdx()) { h->uni_vmovups(dst_vmm, src_vmm); + } } else if (is_full_reg()) { fill_full(dst_vmm); } else { diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_fill_emitter.hpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_fill_emitter.hpp index 23b929cc161ca7..59418189267baa 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_fill_emitter.hpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_fill_emitter.hpp @@ -36,7 +36,7 @@ class jit_fill_emitter : public jit_emitter { return offset == 0; } bool is_optimized() const { - return is_full_reg() && fill_value == uint32_t(0x0); + return is_full_reg() && fill_value == static_cast(0x0); } size_t offset = 0; diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_horizon_emitter.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_horizon_emitter.cpp index 34e9c2f71fd148..46aacdad9475f5 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_horizon_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_horizon_emitter.cpp @@ -48,8 +48,9 @@ void jit_horizon_emitter::emit_isa(const std::vector& in, const std::vec Vmm dst_vmm = Vmm(out[0]); Vmm aux_vmm = Vmm(aux_vec_idxs[0]); - if (in[0] != out[0]) + if (in[0] != out[0]) { h->uni_vmovups(dst_vmm, src_vmm); + } if (isa == dnnl::impl::cpu::x64::avx512_core) { Xbyak::Zmm dst_zmm = Xbyak::Zmm(out[0]); Xbyak::Zmm aux_zmm = Xbyak::Zmm(aux_vec_idxs[0]); diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_kernel_emitter.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_kernel_emitter.cpp index d81f0937f48aba..cd46da410aae28 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_kernel_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_kernel_emitter.cpp @@ -31,10 +31,12 @@ jit_kernel_emitter::jit_kernel_emitter(jit_generator* h, num_outputs = results.size(); std::vector data_ptr_regs; data_ptr_regs.reserve(num_inputs + num_outputs); - for (const auto& param : parameters) + for (const auto& param : parameters) { data_ptr_regs.push_back(param->get_output_port_descriptor(0)->get_reg()); - for (const auto& result : results) + } + for (const auto& result : results) { data_ptr_regs.push_back(result->get_input_port_descriptor(0)->get_reg()); + } std::set unique_buffers; for (const auto& buffer_expr : buffers) { @@ -84,8 +86,9 @@ void jit_kernel_emitter::emit_impl(const std::vector& in, const std::vec std::inserter(available_gpr, available_gpr.begin()), convert); // Note: data_ptr regs are globally live, so it makes no sense to keep them in the pool - for (auto idx : data_ptr_regs_idx) + for (auto idx : data_ptr_regs_idx) { available_gpr.erase({snippets::RegType::gpr, idx}); + } reg_type = snippets::RegType::vec; std::transform(aux_vec_idxs.begin(), aux_vec_idxs.end(), @@ -110,8 +113,9 @@ void jit_kernel_emitter::emit_impl(const std::vector& in, const std::vec } }; std::vector aux_tmp_regs{}; - if (!available_gpr.empty()) + if (!available_gpr.empty()) { aux_tmp_regs.emplace_back(available_gpr.begin()->idx); + } init_data_pointers(utils::transform_idxs_to_regs(in), data_ptr_regs, aux_tmp_regs); for (const auto& expression : *body) { const auto reg_info = expression->get_reg_info(); @@ -123,12 +127,14 @@ void jit_kernel_emitter::emit_impl(const std::vector& in, const std::vec // Note: A few operations are allowed to have mixed register types on their inputs (or outputs) => skip // validation here if (!ov::is_type(node) && !ov::is_type(node) && - !std::dynamic_pointer_cast(emitter)) + !std::dynamic_pointer_cast(emitter)) { std::tie(expected_in_type, expected_out_type) = get_expected_reg_types(emitter); + } // Note: live regs = regs live on input of the expression. We also need to exclude output regs from the pool auto live_regs = expression->get_live_regs(); - for (auto r : reg_info.second) + for (auto r : reg_info.second) { live_regs.insert(r); + } std::vector pool_gp_reg; std::vector pool_vec_reg; std::set_difference(available_gpr.begin(), @@ -198,10 +204,11 @@ void jit_kernel_static_emitter::init_data_pointers(const std::vectormov(data_ptr_regs[i], h->ptr[reg_runtime_params + GET_OFF(src_ptrs) + i * sizeof(void*)]); - else + } else { h->mov(data_ptr_regs[i], h->ptr[reg_runtime_params + GET_OFF(dst_ptrs) + (i - num_inputs) * sizeof(void*)]); + } init_ptr_with_offset(data_ptr_regs[i], data_offsets[i], reg_tmp); } // A rare case when num_params is maximal, so we have no spare gprs @@ -236,10 +243,11 @@ void jit_kernel_dynamic_emitter::init_data_pointers(const std::vectormov(data_ptr_regs[num_params + i], h->ptr[reg_runtime_params + GET_OFF(buffer_scratchpad_ptr)]); } for (size_t i = 0; i < num_params; i++) { - if (i < num_inputs) + if (i < num_inputs) { h->mov(data_ptr_regs[i], h->ptr[reg_runtime_params + GET_OFF(src_ptrs) + i * sizeof(void*)]); - else + } else { h->mov(data_ptr_regs[i], h->ptr[reg_runtime_params + GET_OFF(dst_ptrs) + (i - num_inputs) * sizeof(void*)]); + } } } diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_loop_emitters.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_loop_emitters.cpp index 19eba960b2d79a..9f8f4698d3db17 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_loop_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_loop_emitters.cpp @@ -94,8 +94,9 @@ void jit_loop_begin_emitter::emit_impl(const std::vector& in, const std: // If the loop evaulate once, we can skip loop begin code emission // If work_amount is dynamic, we should get runtime `work_amount` - it might be `zero` and we should skip loop // evaluation - if (evaluate_once && !is_work_amount_dynamic) + if (evaluate_once && !is_work_amount_dynamic) { return; + } Reg64 reg_work_amount = Reg64(static_cast(out.back())); if (is_work_amount_dynamic) { diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_memory_emitters.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_memory_emitters.cpp index 307ef63a8e6a2e..d8332a88a25a4c 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_memory_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_memory_emitters.cpp @@ -73,9 +73,11 @@ size_t jit_memory_emitter::get_parent_buffer_cluster_id(const ov::snippets::lowe size_t jit_memory_emitter::get_consumer_buffer_cluster_id(const ov::snippets::lowered::ExpressionPtr& expr) { OV_CPU_JIT_EMITTER_ASSERT(expr->get_output_port_connectors().size() == 1, "MemoryAccess must have one consumer"); const auto& consumers = expr->get_output_port_connector(0)->get_consumers(); - for (const auto& consumer : consumers) - if (const auto buffer = ov::as_type_ptr(consumer.get_expr())) + for (const auto& consumer : consumers) { + if (const auto buffer = ov::as_type_ptr(consumer.get_expr())) { return buffer->get_cluster_id(); + } + } return SIZE_MAX; } @@ -83,8 +85,9 @@ std::vector jit_memory_emitter::get_available_aux_gprs() const { OV_CPU_JIT_EMITTER_ASSERT(IMPLICATION(is_offset_runtime, !aux_gpr_idxs.empty()), "If offset is dynamic, memory emitter need to have one aux gpr at least!"); auto available_aux_gprs = aux_gpr_idxs; - if (is_offset_runtime) + if (is_offset_runtime) { available_aux_gprs.pop_back(); + } return available_aux_gprs; } @@ -138,11 +141,12 @@ void jit_load_memory_emitter::emit_data() const { jit_load_broadcast_emitter::jit_load_broadcast_emitter(jit_generator* h, cpu_isa_t isa, const ExpressionPtr& expr) : jit_memory_emitter(h, isa, expr, emitter_in_out_map::gpr_to_vec) { OV_CPU_JIT_EMITTER_ASSERT(ov::is_type(expr->get_node()), "expects BroadcastLoad node"); - if (src_prc != dst_prc) + if (src_prc != dst_prc) { OV_CPU_JIT_EMITTER_THROW("supports only equal input and output types but gets: ", src_prc.get_type_name(), " and ", dst_prc.get_type_name()); + } } void jit_load_broadcast_emitter::emit_impl(const std::vector& in, const std::vector& out) const { diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_snippets_emitters.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_snippets_emitters.cpp index ba4012de86d83d..5f9df8ccae6a16 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_snippets_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_snippets_emitters.cpp @@ -36,11 +36,12 @@ jit_result_emitter::jit_result_emitter(jit_generator* h, cpu_isa_t isa, const Ex jit_broadcast_move_emitter::jit_broadcast_move_emitter(jit_generator* h, cpu_isa_t isa, const ExpressionPtr& expr) : jit_emitter(h, isa) { const auto n = expr->get_node(); - if (n->get_input_element_type(0) != n->get_output_element_type(0)) + if (n->get_input_element_type(0) != n->get_output_element_type(0)) { OV_CPU_JIT_EMITTER_THROW("supports only equal input and output types but gets: ", n->get_input_element_type(0), " and ", n->get_output_element_type(0)); + } byte_size = n->get_input_element_type(0).size(); } diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp index 58a31a1804782a..7dd2573c32b113 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp @@ -57,8 +57,9 @@ std::shared_ptr BrgemmKernelExecutor::compile_kernel(const std::shared_ptr compiled_kernel = std::make_shared(); // Brgemm is not executable - nothing to compile - if (config.is_empty()) + if (config.is_empty()) { return compiled_kernel; + } create_brgemm_kernel(compiled_kernel->brgemm_kernel, config.get_dt_in0(), diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_amx.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_amx.cpp index 12c52d43b2c4b8..07c448bc83510f 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_amx.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_amx.cpp @@ -111,8 +111,9 @@ std::shared_ptr BrgemmAMXKernelExecutor::compile_kernel std::shared_ptr compiled_kernel = std::make_shared(); // Brgemm is not executable - nothing to compile - if (config.is_empty()) + if (config.is_empty()) { return compiled_kernel; + } const auto& cache = m_kernel_cache.lock(); OPENVINO_ASSERT(cache, "Invalid kernel cache pointer in BrgemmAMXKernelExecutor::compile_kernel()"); diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_base.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_base.cpp index 8b3ed792fce535..4b7d71fd3a879e 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_base.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_base.cpp @@ -139,8 +139,9 @@ float BrgemmBaseKernelExecutor::get_beta(const ov::snippets::lowered::LoopManage while (loop_id >= 0) { const auto& expanded_loop_info = loop_manager->get_loop_info(loop_id); - if (expanded_loop_info->get_unified_loop_info() != current_unified_loop_info) + if (expanded_loop_info->get_unified_loop_info() != current_unified_loop_info) { return 0; + } if (expanded_loop_info->get_work_amount() > 0) { // there is previous executed Brgemm with `beta = 0` -> the current Brgemm should have `beta = 1` return 1; @@ -244,8 +245,9 @@ void BrgemmBaseKernelExecutor::update_config(const ov::snippets::lowered::Expres K = current_expanded_loop_info->get_work_amount() > 0 ? current_expanded_loop_info->get_increment() : 0; input_pds[0]->set_subtensor_dim(0, K); input_pds[1]->set_subtensor_dim(1, K); - if (K > 0) + if (K > 0) { beta = get_beta(loop_manager, static_cast(loop_ids.back()), current_expanded_loop_info); + } } const auto LDA = DIM_CAST(snippets::utils::get_dim_stride(expr->get_input_port(0))); @@ -255,8 +257,9 @@ void BrgemmBaseKernelExecutor::update_config(const ov::snippets::lowered::Expres const auto& brgemm_node = as_type_ptr(expr->get_node()); OV_CPU_JIT_EMITTER_ASSERT(brgemm_node, "Got invalid node type in update_config"); // In case of data repacking LDB is chosen in accordance with repacking buffer size - if (with_repacking(brgemm_node->get_type())) + if (with_repacking(brgemm_node->get_type())) { LDB = DIM_CAST(brgemm_utils::repacking::compute_LDB(LDB, brgemm_node->get_input_element_type(1))); + } config.update(DIM_CAST(M), DIM_CAST(N), DIM_CAST(K), LDA, LDB, LDC, beta); } diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_copy_b.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_copy_b.cpp index 30d95ed6a2bf7a..54d4ffaa433944 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_copy_b.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_copy_b.cpp @@ -219,8 +219,9 @@ void BrgemmCopyBKernel::generate() { mov(src_reg, ptr[abi_param1 + GET_OFF_BRGEMM_COPY_B_ARGS(src)]); mov(tr_src_reg, ptr[abi_param1 + GET_OFF_BRGEMM_COPY_B_ARGS(tr_src)]); - if (is_with_comp) + if (is_with_comp) { mov(comp_reg, ptr[abi_param1 + GET_OFF_BRGEMM_COPY_B_ARGS(compensation_ptr)]); + } size_t start_in = 0; size_t start_out = 0; @@ -255,8 +256,9 @@ void BrgemmCopyBKernel::emit_brgemm_copy_b_kernel_call(size_t N, spill.preamble(); const auto add_offset = [&](Xbyak::Reg64 reg, size_t bytes_offset) { - if (bytes_offset) + if (bytes_offset) { add(reg, bytes_offset); + } }; // save function address in gpr to pass in call instruction @@ -267,10 +269,11 @@ void BrgemmCopyBKernel::emit_brgemm_copy_b_kernel_call(size_t N, add_offset(src_reg, offset_in); // abi_param2 add_offset(tr_src_reg, offset_out); // abi_param3 - if (is_with_comp) // abi_param4 + if (is_with_comp) { // abi_param4 add_offset(comp_reg, offset_comp); - else + } else { mov(comp_reg, reinterpret_cast(nullptr)); + } #ifdef _WIN32 // Note: ABI requires that the remaining parameters (except the first for) are pushed to the stack in right-to-left diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/utils.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/utils.cpp index ce5096211cc945..3e7e570bc0ace8 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/utils.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/utils.cpp @@ -31,9 +31,11 @@ size_t get_buffer_cluster_id(const ov::snippets::lowered::ExpressionPort& port) break; case ov::snippets::lowered::ExpressionPort::Type::Output: offset = ma_op->get_output_offset(port.get_index()); - for (const auto& child : port.get_connected_ports()) - if (!ov::is_type(child.get_expr()->get_node())) + for (const auto& child : port.get_connected_ports()) { + if (!ov::is_type(child.get_expr()->get_node())) { id = get_cluster_id(child); + } + } break; default: OV_CPU_JIT_EMITTER_THROW("Uknown type of expression port!"); @@ -50,10 +52,12 @@ Xbyak::Reg64 get_aux_gpr(const std::vector& used_gpr_idxs) { static_cast(abi_param2.getIdx())}; for (size_t gpr_idx = 0; gpr_idx <= Xbyak::Operand::R15; ++gpr_idx) { size_t _idx = Xbyak::Operand::R15 - gpr_idx; // we allocate from the end - if (std::find(used_gpr_idxs.cbegin(), used_gpr_idxs.cend(), _idx) != used_gpr_idxs.cend()) + if (std::find(used_gpr_idxs.cbegin(), used_gpr_idxs.cend(), _idx) != used_gpr_idxs.cend()) { continue; - if (blacklist_gpr_idxs.count(_idx) > 0) + } + if (blacklist_gpr_idxs.count(_idx) > 0) { continue; + } return Xbyak::Reg64(_idx); } OV_CPU_JIT_EMITTER_THROW("Failed to allocate aux GPR"); @@ -87,8 +91,9 @@ void push_ptr_with_static_offset_on_stack(dnnl::impl::cpu::x64::jit_generator* h size_t ptr_offset) { const auto stack_frame = h->qword[h->rsp + stack_offset]; h->mov(stack_frame, ptr_reg); - if (ptr_offset != 0) + if (ptr_offset != 0) { h->add(stack_frame, ptr_offset); + } } } // namespace utils diff --git a/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_equation_emitter.cpp b/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_equation_emitter.cpp index 365acc366840e0..cd636d4d05dbdc 100644 --- a/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_equation_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_equation_emitter.cpp @@ -114,8 +114,9 @@ void EquationTppEmitter::emit_impl(const std::vector& in, const std::vec const auto addr = h->rsp + i * sizeof(void*); h->mov(h->qword[addr], Reg64(static_cast(reg_idx))); const auto bytes_offset = io_offsets[i]; - if (bytes_offset) + if (bytes_offset) { h->add(h->qword[addr], bytes_offset); + } } const auto& compiled_kernel = get_compiled_kernel_ptr(); @@ -138,8 +139,9 @@ void EquationTppEmitter::emit_impl(const std::vector& in, const std::vec void EquationTppEmitter::execute_kernel(libxsmm_meqn_function equation_kernel, int argc, void** argv) { std::vector inputs(argc - 1); - for (int i = 0; i < argc - 1; i++) + for (int i = 0; i < argc - 1; i++) { inputs[i].primary = argv[i]; + } libxsmm_meqn_param param; param.ops_args = nullptr; param.inputs = inputs.data(); diff --git a/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_tpp_emitter.cpp b/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_tpp_emitter.cpp index a18b1616bb517c..04f3b70c98b10b 100644 --- a/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_tpp_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_tpp_emitter.cpp @@ -19,10 +19,12 @@ VectorDims TppEmitter::get_projected_subtensor(const snippets::lowered::PortDesc auto shape = desc->get_shape(); auto subtensor = desc->get_subtensor(); // Note: Scalar is a special case, so it's easier to prepend shapes than to handle it explicitly - if (shape.size() == 1) + if (shape.size() == 1) { shape.insert(shape.begin(), 1); - if (subtensor.size() == 1) + } + if (subtensor.size() == 1) { subtensor.insert(subtensor.begin(), 1); + } OV_CPU_JIT_EMITTER_ASSERT(subtensor.size() <= shape.size() && !subtensor.empty(), "Invalid subtensor + shape combination"); auto shape_it = shape.rbegin(); @@ -50,8 +52,9 @@ TppEmitter::TppEmitter(dnnl::impl::cpu::x64::jit_generator* h, io_port_descriptors.resize(num_kernel_args); // Note: this is needed mostly for Reduce operations, since they allow the last subternsor dim to be FULL_DIM; auto replace_full_dim = [](size_t dim, size_t replace_dim) { - if (ov::snippets::utils::is_full_dim_value(dim)) + if (ov::snippets::utils::is_full_dim_value(dim)) { return replace_dim; + } return dim; }; @@ -102,8 +105,9 @@ void TppEmitter::emit_impl(const std::vector& in, const std::vectoruni_vmovq(reg, xmm); - if (bytes_offset) + if (bytes_offset) { h->add(reg, bytes_offset); + } }; const auto& compiled_kernel = get_compiled_kernel_ptr(); OV_CPU_JIT_EMITTER_ASSERT(compiled_kernel, "Failed to compile libxsmm_kernel"); diff --git a/src/plugins/intel_cpu/src/emitters/utils.cpp b/src/plugins/intel_cpu/src/emitters/utils.cpp index 81066b9ed48ccb..ba726d739385b7 100644 --- a/src/plugins/intel_cpu/src/emitters/utils.cpp +++ b/src/plugins/intel_cpu/src/emitters/utils.cpp @@ -26,10 +26,12 @@ std::string jit_emitter_pretty_name(const std::string& pretty_func) { size_t counter = 1; while (counter != 0 && parenthesis > 0) { parenthesis--; - if (pretty_func[parenthesis] == '>') + if (pretty_func[parenthesis] == '>') { counter++; - if (pretty_func[parenthesis] == '<') + } + if (pretty_func[parenthesis] == '<') { counter--; + } } } SAFE_SYMBOL_FINDING(end, pretty_func.substr(0, parenthesis).rfind("::")) diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 01371d64b779a0..02e05156883534 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -72,8 +72,9 @@ void Graph::CreateGraph(const std::vector& graphNodes, const std::vector& graphEdges, const GraphContext::CPtr& context, std::string name) { - if (IsReady()) + if (IsReady()) { ForgetGraphData(); + } m_context = context; m_stream = dnnl::stream(getEngine()); @@ -211,8 +212,9 @@ void Graph::Replicate(const std::shared_ptr& model, const auto& childEdges = node->getChildEdges(); return std::any_of(childEdges.begin(), childEdges.end(), [](const EdgeWeakPtr& edge) -> bool { auto edgePtr = edge.lock(); - if (!edgePtr) + if (!edgePtr) { return false; + } return edgePtr->getChild()->getType() == Type::Subgraph; }); }; @@ -230,8 +232,9 @@ void Graph::Replicate(const std::shared_ptr& model, const auto child_prec = child->getOriginalInputPrecisionAtPort(childEdges[i]->getOutputNum()); if (!one_of(child_prec, ov::element::bf16, ov::element::f16) && // remove this WA when #78939 is resolved - !hasSubgraphConsumers(child)) + !hasSubgraphConsumers(child)) { child->setOriginalInputPrecisionAtPort(childEdges[i]->getOutputNum(), precToSet); + } } } @@ -255,8 +258,9 @@ static std::vector IdentifySyncPoints(const std::vector& graphN for (size_t i = 0; i < graphNodes.size(); ++i) { const auto& node = graphNodes[i]; - if (!node->isDynamicNode()) + if (!node->isDynamicNode()) { continue; + } if (node->outputShapeDataDependency() || // WA: for convolution plus sum(broadcast). Due to the fact that a convolution with sum use the same memory @@ -316,8 +320,9 @@ void Graph::Init(const std::shared_ptr& model, const GraphContext::CPtr& context, const std::vector& inputConfigs, const std::vector& outputConfigs) { - if (IsReady()) + if (IsReady()) { ForgetGraphData(); + } m_context = context; m_stream = dnnl::stream(getEngine()); @@ -534,8 +539,9 @@ void Graph::CreatePrimitivesAndExecConstants() const { if (edgePtr->isUseExternalMemory()) { auto ptr = m_context->getWeightsCache()->get(edgePtr->hash()); outputs.emplace_back(ptr); - if (!ptr->isValid()) + if (!ptr->isValid()) { hasExternalInvalidEdges = true; + } } else { hasLocalAllocatedEdges = true; } @@ -562,8 +568,9 @@ void Graph::CreatePrimitivesAndExecConstants() const { if (std::get<0>(sharedOutputs) || std::get<1>(sharedOutputs)) { ExecuteNodeWithCatch(node); - for (auto& output : std::get<2>(sharedOutputs)) + for (auto& output : std::get<2>(sharedOutputs)) { output->valid(true); + } } } else { ExecuteNodeWithCatch(node); @@ -705,8 +712,9 @@ void Graph::ResolveComplexInplaceConflicts() { if (auto modifyingNode = edge->modifiedInPlace()) { auto execIndex = modifyingNode->getExecIndex(); for (const auto& pEdgePeer : portChildEdges) { - if (pEdgePeer == edge) + if (pEdgePeer == edge) { continue; + } std::vector vecConsumers; pEdgePeer->collectConsumers(vecConsumers); @@ -925,8 +933,9 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { // TODO: WA for some test (like strided_slice_test) which use tensors with // shapes {0}. And it is implicitly converted into {1} tensor. // Zeroing of input data allow pass tests. - if (edge->getParent()->type == Type::Input && edge->hasDefinedMaxSize()) + if (edge->getParent()->type == Type::Input && edge->hasDefinedMaxSize()) { edge->getMemoryPtr()->nullify(); + } count++; } @@ -986,15 +995,17 @@ void Graph::Allocate(const std::vector& syncNodesInds) { // resolve edges. Define which will be a view on others // NeedAllocation - real blob // NotAllocated - view on other blob, peer or in-place - for (auto& edge : graphEdges) + for (auto& edge : graphEdges) { edge->init(); + } // Allocate memory space for all edges marked with NeedAllocation AllocateWithReuse(syncNodesInds); // Check all getters. Should work. - for (auto& edge : graphEdges) + for (auto& edge : graphEdges) { edge->validate(); + } } bool Graph::ProcessDynNodes() { @@ -1008,8 +1019,9 @@ bool Graph::ProcessDynNodes() { } void Graph::PushInputData(const std::size_t& index, const ov::SoPtr& input) { - if (!IsReady()) + if (!IsReady()) { OPENVINO_THROW("Wrong state. Topology not ready."); + } auto input_itr = inputNodesMap.find(index); if (input_itr != inputNodesMap.end()) { auto node = input_itr->second; @@ -1041,8 +1053,9 @@ void Graph::PushInputData(const std::size_t& index, const ov::SoPtr& in // suppose always being shared infer_request intel_cpu::Tensor to Graph if isDynamic. void Graph::PullOutputData(std::unordered_map>& output) { - if (!IsReady()) + if (!IsReady()) { OPENVINO_THROW("Wrong state. Topology not ready."); + } for (auto& outputMap : outputNodesMap) { auto output_index = outputMap.first; @@ -1065,9 +1078,10 @@ void Graph::PullOutputData(std::unordered_map>& bool isScalarOutput = false; if (ext_blob->get_shape().empty() && ext_blob->get_size() == 1) { const auto& actualDims = expected_desc_ptr->getShape().getStaticDims(); - isScalarOutput = - !actualDims.empty() && - std::accumulate(actualDims.begin(), actualDims.end(), (size_t)1, std::multiplies()) == 1; + isScalarOutput = !actualDims.empty() && std::accumulate(actualDims.begin(), + actualDims.end(), + static_cast(1), + std::multiplies()) == 1; } auto outDims = intr_blob.getStaticDims(); @@ -1105,12 +1119,13 @@ void Graph::PullOutputData(std::unordered_map>& auto srcPrec = actualDesc->getPrecision(); auto dstPrec = expected_desc_ptr->getPrecision(); - if (srcPrec == dstPrec && ext_blob->get_byte_size() != intr_blob.getSize()) + if (srcPrec == dstPrec && ext_blob->get_byte_size() != intr_blob.getSize()) { OPENVINO_THROW("Output tensor byte size is not equal model output byte size (", ext_blob->get_byte_size(), "!=", intr_blob.getSize(), ")."); + } void* ext_blob_ptr = ext_blob->data(); void* intr_blob_ptr = intr_blob.getData(); @@ -1126,8 +1141,9 @@ void Graph::PullOutputData(std::unordered_map>& "\r\n"); // That is the same memory. No need to copy - if (ext_blob_ptr == intr_blob_ptr) + if (ext_blob_ptr == intr_blob_ptr) { continue; + } if (actualDesc->getPrecision() == element::string) { StringMemory outBloMem(getEngine(), expected_desc_ptr, ext_blob_ptr); @@ -1416,8 +1432,9 @@ class UpdateNodes : public UpdateNodesBase { DEBUG_LOG(*node); inline void Graph::ExecuteNode(const NodePtr& node, SyncInferRequest* request, int numaId) const { - if (request) + if (request) { request->throw_if_canceled(); + } node->execute(m_stream, numaId); } @@ -1487,8 +1504,9 @@ void Graph::Infer(SyncInferRequest* request) { static_cast(status)); } - if (infer_count != -1) + if (infer_count != -1) { infer_count++; + } } void Graph::SortTopologically() { @@ -1507,8 +1525,9 @@ void Graph::SortTopologically() { std::function visit; visit = [&execIndexCnt, &sorted, &visit](const NodePtr& node) { - if (node->execIndex >= 0) + if (node->execIndex >= 0) { return; // already visited + } for (size_t i = 0; i < node->getParentEdges().size(); i++) { visit(node->getParentEdgeAt(i)->getParent()); @@ -1549,10 +1568,11 @@ void Graph::SortTopologically() { for (size_t i = 0; i < node->childEdges.size(); i++) { auto edge = node->getChildEdgeAt(i); int port = edge->getInputNum(); - if (port < port_num && !res[port]) + if (port < port_num && !res[port]) { res[port] = edge; - else + } else { res.push_back(edge); + } } node->childEdges = {res.begin(), res.end()}; } @@ -1580,8 +1600,9 @@ void Graph::GetPerfData(std::vector& perfMap) const { }; for (size_t i = 0; i < graphNodes.size(); i++) { - if (graphNodes[i]->isConstant()) + if (graphNodes[i]->isConstant()) { continue; + } getPerfMapFor(perfMap, graphNodes[i]); } } @@ -1616,22 +1637,26 @@ void Graph::DropNode(const NodePtr& node) { for (size_t i = 0; i < parents.size(); i++) { auto p_edge = parents[i].lock(); - if (!p_edge) + if (!p_edge) { continue; + } auto parent = p_edge->getParent(); - if (!parent) + if (!parent) { continue; + } const int inNum = p_edge->getInputNum(); RemoveEdge(p_edge); for (size_t j = 0; j < children.size(); j++) { auto c_edge = children[j].lock(); - if (!c_edge) + if (!c_edge) { continue; + } auto child = c_edge->getChild(); - if (!child) + if (!child) { continue; + } const int outNum = c_edge->getOutputNum(); RemoveEdge(c_edge); @@ -1645,32 +1670,38 @@ void Graph::DropDWConvNode(const NodePtr& node) { auto parents = node->parentEdges; auto parentConvEdge = parents[0].lock(); - if (!parentConvEdge) + if (!parentConvEdge) { return; + } auto parentConv = parentConvEdge->getParent(); - if (!parentConv) + if (!parentConv) { return; + } parentConv->outputShapes[0] = node->outputShapes[0]; for (size_t i = 0; i < 1; i++) { auto p_edge = parents[i].lock(); - if (!p_edge) + if (!p_edge) { continue; + } auto parent = p_edge->getParent(); - if (!parent) + if (!parent) { continue; + } const int inNum = p_edge->getInputNum(); RemoveEdge(p_edge); for (size_t j = 0; j < children.size(); j++) { auto c_edge = children[j].lock(); - if (!c_edge) + if (!c_edge) { continue; + } auto child = c_edge->getChild(); - if (!child) + if (!child) { continue; + } const int outNum = c_edge->getOutputNum(); RemoveEdge(c_edge); @@ -1680,11 +1711,13 @@ void Graph::DropDWConvNode(const NodePtr& node) { for (size_t i = 1; i < parents.size(); i++) { auto p_edge = parents[i].lock(); - if (!p_edge) + if (!p_edge) { continue; + } auto parent = p_edge->getParent(); - if (!parent) + if (!parent) { continue; + } const int inNum = p_edge->getInputNum(); const int portCandidate = p_edge->getOutputNum(); @@ -1753,7 +1786,7 @@ NodePtr Graph::InsertReorder(const EdgePtr& edge, bool Graph::InsertNode(const EdgePtr& edge, const NodePtr& node, bool initNode) { auto oIndex = edge->getOutputNum(); auto iIndex = edge->getInputNum(); - if (iIndex < 0 || oIndex < 0) + if (iIndex < 0 || oIndex < 0) { OPENVINO_THROW("Cannot insert node '", node->getName(), "' between nodes: ", @@ -1761,6 +1794,7 @@ bool Graph::InsertNode(const EdgePtr& edge, const NodePtr& node, bool initNode) " and ", edge->getChild()->getName(), "."); + } edge->getParent()->removeChildEdge(edge); edge->getChild()->removeParentEdge(edge); @@ -1794,8 +1828,9 @@ void Graph::EnforceInferencePrecision() { const auto inferPrec = getConfig().inferencePrecision; - if (one_of(inferPrec, element::f32, element::undefined, ov::element::f16)) + if (one_of(inferPrec, element::f32, element::undefined, ov::element::f16)) { return; // nothing to do, only precision reduction is currently allowed + } #if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) if (inferPrec == ov::element::f16) return; // precision of configured by ov::pass::ConvertPrecision @@ -1817,8 +1852,9 @@ void Graph::EnforceInferencePrecision() { Type::Interpolate, // super resolution nets Type::PagedAttention, // page attention Type::QKVProjection, - Type::LLMMLP)) + Type::LLMMLP)) { continue; // stop at significant nodes + } } else if (inferPrec == ov::element::f16) { /* list of node types that must be forced to be executed in FP16 precision * because of performance gains */ @@ -1828,14 +1864,16 @@ void Graph::EnforceInferencePrecision() { Type::FullyConnected, // conv / bert nets Type::MatMul, // bert nets Type::Pooling, - Type::MVN)) + Type::MVN)) { continue; // stop at significant nodes + } } const auto res = skipNodes.insert(parent); - if (res.second) // node not visited yet + if (res.second) { // node not visited yet searchForNodesToSkip(parent, skipNodes); + } } }; @@ -1847,20 +1885,24 @@ void Graph::EnforceInferencePrecision() { for (const auto& entry : outputNodesMap) { const auto& output = entry.second; // do not skip outputs which precisions are explicitly set equal to inferPrec - if (output->getOriginalInputPrecisionAtPort(0) == inferPrec) + if (output->getOriginalInputPrecisionAtPort(0) == inferPrec) { continue; + } searchForNodesToSkip(output, nodesToSkip); } for (const auto& node : graphNodes) { - if (nodesToSkip.count(node) && !node->enforceBF16evenForGraphTail) + if (nodesToSkip.count(node) && !node->enforceBF16evenForGraphTail) { continue; + } - if (one_of(node->getType(), Type::Input, Type::Output, Type::MemoryInput, Type::MemoryOutput)) + if (one_of(node->getType(), Type::Input, Type::Output, Type::MemoryInput, Type::MemoryOutput)) { continue; - if (node->keepOrigPrecision()) + } + if (node->keepOrigPrecision()) { continue; + } #ifdef CPU_DEBUG_CAPS if (!inferPrecDebug.enabled(NameFromType(node->getType()), node->getName(), node->getOriginalLayers())) continue; @@ -1869,23 +1911,27 @@ void Graph::EnforceInferencePrecision() { for (size_t i = 0; i < node->getOriginalInputsNumber(); i++) { auto keepOriginalInputPrecisionAtPort = [](const NodePtr& node, const size_t inPort) { // keep non-float32 precisions - if (node->getOriginalInputPrecisionAtPort(inPort) != ov::element::f32) + if (node->getOriginalInputPrecisionAtPort(inPort) != ov::element::f32) { return true; + } // kvcache of PagedAttention should be written directly - if (node->getType() == Type::PagedAttention && (inPort == 3 || inPort == 4)) + if (node->getType() == Type::PagedAttention && (inPort == 3 || inPort == 4)) { return true; + } const auto& parent = node->getParentEdgeAt(inPort)->getParent(); /* Skip BF16 enforcement for nodes after Constant Inputs for maintaining precision for fusing. * Element type conversion to bf16 is done automatically, if convolution follows up after Constant * Inputs and activation is bf16 */ if (parent->getType() == Type::Input && parent->isConstant() && // Concatenation node is exception because it doesn't change an accuracy for BF16 activation - node->getType() != Type::Concatenation) + node->getType() != Type::Concatenation) { return true; + } // Eltwise and Subgraph (snippets) nodes support precision conversion - if (parent->getType() == Type::Input && one_of(node->getType(), Type::Eltwise, Type::Subgraph)) + if (parent->getType() == Type::Input && one_of(node->getType(), Type::Eltwise, Type::Subgraph)) { return true; + } // exclude Convert after Range since it may cause precision loss when integter type to LP. if (parent->getType() == Type::Range && node->getType() == Type::Convert) { @@ -1895,8 +1941,9 @@ void Graph::EnforceInferencePrecision() { return false; }; - if (keepOriginalInputPrecisionAtPort(node, i)) + if (keepOriginalInputPrecisionAtPort(node, i)) { continue; + } DEBUG_LOG("#", node->getExecIndex(), @@ -1913,17 +1960,20 @@ void Graph::EnforceInferencePrecision() { for (size_t i = 0; i < node->getOriginalOutputsNumber(); i++) { // keep non-float32 precisions - if (node->getOriginalOutputPrecisionAtPort(i) != ov::element::f32) + if (node->getOriginalOutputPrecisionAtPort(i) != ov::element::f32) { continue; + } // exclude Convert before Range since it may cause precision loss when integter type to LP. // TODO: Incorrect subgraph is generated by ONNX FE + ticket 117861. const auto& child = node->getChildEdgeAt(i)->getChild(); - if (child->getType() == Type::Range && node->getType() == Type::Convert) + if (child->getType() == Type::Range && node->getType() == Type::Convert) { continue; + } // skip second output of PagedAttention - if (node->getType() == Type::PagedAttention && (i != 0)) + if (node->getType() == Type::PagedAttention && (i != 0)) { continue; + } DEBUG_LOG("#", node->getExecIndex(), diff --git a/src/plugins/intel_cpu/src/graph_context.cpp b/src/plugins/intel_cpu/src/graph_context.cpp index e7dd513fa2f790..a81acfafee37bf 100644 --- a/src/plugins/intel_cpu/src/graph_context.cpp +++ b/src/plugins/intel_cpu/src/graph_context.cpp @@ -30,8 +30,9 @@ GraphContext::GraphContext(Config config, m_cpuStreamExecutor = std::dynamic_pointer_cast(streamExecutor); m_numaNodeId = m_cpuStreamExecutor ? m_cpuStreamExecutor->get_numa_node_id() : 0; auto nNumaNodes = get_num_numa_nodes(); - if (m_numNumaNodes < nNumaNodes) + if (m_numNumaNodes < nNumaNodes) { m_numNumaNodes = nNumaNodes; + } } // primitive/executors can be shared across sub-stream // but scratch pad cannot be shared. diff --git a/src/plugins/intel_cpu/src/graph_dumper.cpp b/src/plugins/intel_cpu/src/graph_dumper.cpp index 73c99a52156ad6..ede91d4d659477 100644 --- a/src/plugins/intel_cpu/src/graph_dumper.cpp +++ b/src/plugins/intel_cpu/src/graph_dumper.cpp @@ -56,9 +56,11 @@ std::map extract_node_metadata(const NodePtr& node) { // If all output precisions are the same, we store the name only once if (!isAllEqual) { - for (size_t i = 1; i < node->getChildEdges().size(); i++) + for (size_t i = 1; i < node->getChildEdges().size(); i++) { outputPrecisionsStr += - "," + std::string(node->getChildEdgeAt(i)->getMemory().getDesc().getPrecision().get_type_name()); + "," + static_cast( + node->getChildEdgeAt(i)->getMemory().getDesc().getPrecision().get_type_name()); + } } } else { // Branch to correctly handle output nodes @@ -189,8 +191,9 @@ std::shared_ptr dump_graph_as_ie_ngraph_net(const Graph& graph) { to_hold.push_back(return_node); } - for (auto&& kvp : meta_data) + for (auto&& kvp : meta_data) { return_node->get_rt_info()[kvp.first] = kvp.second; + } return_node->set_friendly_name(node->getName()); return return_node; @@ -203,10 +206,12 @@ std::shared_ptr dump_graph_as_ie_ngraph_net(const Graph& graph) { node2layer[node] = nodes.back(); } - for (auto&& kvp : paramsMap) + for (auto&& kvp : paramsMap) { params.push_back(kvp.second); - for (auto&& kvp : resultsMap) + } + for (auto&& kvp : resultsMap) { results.push_back(kvp.second); + } auto holder = !results.empty() ? results[0] : std::make_shared(); for (auto& node : to_hold) { diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index 95ba27f3fa0828..635ab736b6bd2c 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -215,13 +215,16 @@ void GraphOptimizer::FuseConvMatmulFCDeconvAndDQScales(Graph& graph) { auto parentNode = node->getParentEdgeAt(0)->getParent(); auto scaleNode = node->getParentEdgeAt(1)->getParent(); if (!(parentNode->getType() == Type::Convolution || parentNode->getType() == Type::MatMul || - parentNode->getType() == Type::Deconvolution)) + parentNode->getType() == Type::Deconvolution)) { return false; - if (!scaleNode->isConstant()) + } + if (!scaleNode->isConstant()) { return false; + } // Only Fusing scales for INT8 precision. - if (!parentNode->canBeExecutedInInt8()) + if (!parentNode->canBeExecutedInInt8()) { return false; + } return (parentNode->getParentEdges().size() == 2); }; @@ -230,37 +233,45 @@ void GraphOptimizer::FuseConvMatmulFCDeconvAndDQScales(Graph& graph) { const auto channelAxis = node->getFusingAxis(); auto OC = nodeOutDims[channelAxis]; - if (Shape::UNDEFINED_DIM == OC) + if (Shape::UNDEFINED_DIM == OC) { return false; - if (!node->getFusedWith().empty() || !scales->getFusedWith().empty()) + } + if (!node->getFusedWith().empty() || !scales->getFusedWith().empty()) { return false; + } const auto scalesDims = getNormalizedDimsBySize(scales->getOutputShapeAtPort(0).getDims(), nodeOutDims.size()); - if (nodeOutDims.size() != scalesDims.size() || scalesDims.size() < 2) + if (nodeOutDims.size() != scalesDims.size() || scalesDims.size() < 2) { return false; + } - if (!dimsEqualStrong(scalesDims[channelAxis], nodeOutDims[channelAxis]) && scalesDims[channelAxis] != 1) + if (!dimsEqualStrong(scalesDims[channelAxis], nodeOutDims[channelAxis]) && scalesDims[channelAxis] != 1) { return false; + } for (size_t i = 0; i < scalesDims.size(); i++) { - if (scalesDims[i] != 1 && static_cast(i) != channelAxis) + if (scalesDims[i] != 1 && static_cast(i) != channelAxis) { return false; + } } return true; }; auto initializeDeQuantizedScales = [](const NodePtr& node, const NodePtr& scales) { auto scalesConstant = dynamic_cast(scales.get()); - if (scalesConstant == nullptr) + if (scalesConstant == nullptr) { OPENVINO_THROW("Cannot cast to Input node"); + } auto scalesBlob = scalesConstant->getMemoryPtr(); - if (scalesBlob == nullptr) + if (scalesBlob == nullptr) { OPENVINO_THROW("Cannot cast to TBlob internal scales blob"); + } auto scalesData = static_cast(scalesBlob->getData()); - if (scalesData == nullptr) + if (scalesData == nullptr) { OPENVINO_THROW("scalesBlob has not allocated buffer"); + } auto scalesDims = getNormalizedDimsBySize(scales->getOutputShapeAtPort(0).getDims(), node->getOutputShapeAtPort(0).getDims().size()); auto scaleSize = std::accumulate(scalesDims.begin(), scalesDims.end(), 1, std::multiplies()); @@ -270,15 +281,17 @@ void GraphOptimizer::FuseConvMatmulFCDeconvAndDQScales(Graph& graph) { for (size_t i = 0; i < graphNodes.size(); i++) { auto mul = graphNodes[i]; - if (!isDQScaleGraphPattern(mul)) + if (!isDQScaleGraphPattern(mul)) { continue; + } CPU_GRAPH_OPTIMIZER_SCOPE(FuseConvMatmulFCDeconvAndDQScales); auto node = mul->getParentEdgeAt(0)->getParent(); auto scales = mul->getParentEdgeAt(1)->getParent(); - if (!scaleDimsCheck(node, scales)) + if (!scaleDimsCheck(node, scales)) { continue; + } if (initializeDeQuantizedScales(node, scales)) { DEBUG_LOG("GraphOptimizer##FusingDQ: Node ##", @@ -299,27 +312,32 @@ void GraphOptimizer::FuseConvolutionMatMulDeconvAndBias(Graph& graph) { auto isSuitableParentNode = [](const NodePtr& node) { const auto deconv = std::dynamic_pointer_cast(node); // bias should be the first child - if (!node->getFusedWith().empty()) + if (!node->getFusedWith().empty()) { return false; + } // no other child other than bias-add - if (node->getChildEdges().size() != 1) + if (node->getChildEdges().size() != 1) { return false; + } - if (!deconv) + if (!deconv) { return (one_of(node->getType(), Type::Convolution, Type::MatMul) && node->getParentEdges().size() == 2); - else + } else { return deconv->canFuseBias(); + } }; auto isSuitableChildNode = [&](const NodePtr& parentNode, const NodePtr& childNode) { if (childNode->getAlgorithm() != Algorithm::EltwiseAdd || !childNode->getFusedWith().empty() || - childNode->getParentEdges().size() != 2) + childNode->getParentEdges().size() != 2) { return false; + } auto biasPort = childNode->getParentEdgeAt(0)->getParent() == parentNode ? 1 : 0; const auto biasNode = childNode->getParentEdgeAt(biasPort)->getParent(); - if (biasNode->getType() != Type::Input || !biasNode->isConstant() || biasNode->getChildEdges().size() != 1) + if (biasNode->getType() != Type::Input || !biasNode->isConstant() || biasNode->getChildEdges().size() != 1) { return false; + } const auto parentOutDims = parentNode->getOutputShapeAtPort(0).getDims(); const auto biasDims = @@ -327,16 +345,19 @@ void GraphOptimizer::FuseConvolutionMatMulDeconvAndBias(Graph& graph) { // TODO [NM]: Legacy ConvBias fusion transformation supports both per-tensor (via explicit broadcasing) and // per-channel cases. Most of the real models contain per-channel bias, so we need to reavaluate the need to // support per-tensor variant. - if (parentOutDims.size() != biasDims.size() || biasDims.size() < 2) + if (parentOutDims.size() != biasDims.size() || biasDims.size() < 2) { return false; + } const auto channelAxis = parentNode->getFusingAxis(); - if (!dimsEqualStrong(biasDims[channelAxis], parentOutDims[channelAxis])) + if (!dimsEqualStrong(biasDims[channelAxis], parentOutDims[channelAxis])) { return false; + } for (size_t i = 0; i < biasDims.size(); i++) { - if (biasDims[i] != 1 && static_cast(i) != channelAxis) + if (biasDims[i] != 1 && static_cast(i) != channelAxis) { return false; + } } return true; @@ -362,19 +383,23 @@ void GraphOptimizer::FuseConvolutionMatMulDeconvAndBias(Graph& graph) { for (size_t i = 0; i < parents.size(); i++) { auto p_edge = parents[i].lock(); - if (!p_edge) + if (!p_edge) { continue; + } auto parent = p_edge->getParent(); - if (!parent) + if (!parent) { continue; + } if (parent == parentNode) { for (size_t j = 0; j < childs.size(); j++) { - if (!childs[j].lock()) + if (!childs[j].lock()) { continue; + } auto child = childs[j].lock()->getChild(); - if (!child) + if (!child) { continue; + } EdgePtr& remEdge = p_edge; int inNum = 0; @@ -459,11 +484,13 @@ void GraphOptimizer::FuseDeconvolutionAndSimpleOperation(Graph& graph) { auto& graphNodes = graph.GetNodes(); auto isSuitableParentNode = [](const NodePtr& node) { - if (node->getType() != Type::Deconvolution || node->getChildEdges().size() != 1) + if (node->getType() != Type::Deconvolution || node->getChildEdges().size() != 1) { return false; + } const auto deconv = std::dynamic_pointer_cast(node); - if (deconv == nullptr) + if (deconv == nullptr) { OPENVINO_THROW("Cannot cast to deconvolution node ", node->getName()); + } if (deconv->getAlgorithm() != Algorithm::DeconvolutionCommon) { return true; @@ -473,10 +500,12 @@ void GraphOptimizer::FuseDeconvolutionAndSimpleOperation(Graph& graph) { const auto& kernel = deconv->getWeightDims(); // WA oneDNN doesn't support fusing post ops after deconvolution with strides over kernel size bool isSupportedParams = strides[strides.size() - 1] <= static_cast(kernel[kernel.size() - 1]); - if (strides.size() > 1) + if (strides.size() > 1) { isSupportedParams &= strides[strides.size() - 2] <= static_cast(kernel[kernel.size() - 2]); - if (strides.size() > 2) + } + if (strides.size() > 2) { isSupportedParams &= strides[strides.size() - 3] <= static_cast(kernel[kernel.size() - 3]); + } return isSupportedParams; }; @@ -503,8 +532,9 @@ void GraphOptimizer::FuseDeconvolutionAndSimpleOperation(Graph& graph) { auto parentEdges = childNode->parentEdges; for (auto& parentEdge : parentEdges) { auto p_edge = parentEdge.lock(); - if (p_edge->getParent()->getType() == Type::Deconvolution) + if (p_edge->getParent()->getType() == Type::Deconvolution) { continue; + } graph.RemoveEdge(p_edge); } @@ -517,47 +547,54 @@ void GraphOptimizer::FuseMultiplyAndAdd(Graph& graph) { auto& graphNodes = graph.GetNodes(); auto isSuitableSecondInput = [](const NodePtr& node, VectorDims dataDims) { - if (node->getType() != Type::Input || !node->isConstant()) + if (node->getType() != Type::Input || !node->isConstant()) { return false; + } const auto secondInputDims = node->getOutputShapeAtPort(0).getStaticDims(); - if (secondInputDims.size() != dataDims.size() || secondInputDims.size() < 2) + if (secondInputDims.size() != dataDims.size() || secondInputDims.size() < 2) { return false; + } auto getChannelAxis = [](const VectorDims& dims) { auto channelAxis = -1; for (size_t i = 0; i < dims.size(); i++) { if (dims[i] != 1) { - if (channelAxis != -1) // more than one axis is != 1 + if (channelAxis != -1) { // more than one axis is != 1 return -1; - else + } else { channelAxis = i; + } } } return channelAxis; }; const auto channelAxis = getChannelAxis(secondInputDims); - if (channelAxis == -1) + if (channelAxis == -1) { return false; + } - if (secondInputDims[0] != 1 || !dimsEqualWeak(secondInputDims[channelAxis], dataDims[channelAxis])) + if (secondInputDims[0] != 1 || !dimsEqualWeak(secondInputDims[channelAxis], dataDims[channelAxis])) { return false; + } return true; }; auto isSuitableParentNode = [&](const NodePtr& node) { if (node->getAlgorithm() != Algorithm::EltwiseMultiply || !node->getFusedWith().empty() || - node->getParentEdges().size() != 2 || node->getChildEdges().size() != 1) + node->getParentEdges().size() != 2 || node->getChildEdges().size() != 1) { return false; + } return isSuitableSecondInput(node->getParentEdgeAt(1)->getParent(), node->getInputShapeAtPort(0).getDims()); }; auto isSuitableChildNode = [&](const NodePtr& parentNode, const NodePtr& childNode) { if (childNode->getAlgorithm() != Algorithm::EltwiseAdd || !childNode->getFusedWith().empty() || - childNode->getParentEdges().size() != 2) + childNode->getParentEdges().size() != 2) { return false; + } return isSuitableSecondInput(childNode->getParentEdgeAt(1)->getParent(), childNode->getInputShapeAtPort(0).getDims()) && @@ -587,19 +624,23 @@ void GraphOptimizer::FuseMultiplyAndAdd(Graph& graph) { for (size_t i = 0; i < parents.size(); i++) { auto p_edge = parents[i].lock(); - if (!p_edge) + if (!p_edge) { continue; + } auto parent = p_edge->getParent(); - if (!parent) + if (!parent) { continue; + } if (parent == parentNode) { for (size_t j = 0; j < childs.size(); j++) { - if (!childs[j].lock()) + if (!childs[j].lock()) { continue; + } auto child = childs[j].lock()->getChild(); - if (!child) + if (!child) { continue; + } EdgePtr& remEdge = p_edge; int inNum = 0; @@ -672,17 +713,21 @@ void GraphOptimizer::MergeConvertAndScaleShift(Graph& graph) { const auto parents = parentNode->parentEdges; for (size_t i = 0; i < parents.size(); i++) { auto p_edge = parents[i].lock(); - if (!p_edge) + if (!p_edge) { continue; + } auto parent = p_edge->getParent(); - if (!parent) + if (!parent) { continue; + } - if (!parentNode->childEdges[0].lock()) + if (!parentNode->childEdges[0].lock()) { continue; + } auto child = parentNode->childEdges[0].lock()->getChild(); - if (!child) + if (!child) { continue; + } EdgePtr& remEdge = p_edge; int inNum = 0; @@ -731,16 +776,18 @@ void GraphOptimizer::FuseFCAndConvertOnWeights(Graph& graph) { NodePtr transpose = nullptr; auto parent = fullyConnected->getParentEdgeAt(1)->getParent(); if (parent->getType() == Type::Transpose) { - if (!isSuitableTranspose(parent)) + if (!isSuitableTranspose(parent)) { continue; + } transpose = parent; parent = transpose->getParentEdgeAt(0)->getParent(); } const auto convert = parent; - if (!isSuitableConvert(convert)) + if (!isSuitableConvert(convert)) { continue; + } const auto weights = convert->getParentEdgeAt(0)->getParent(); const auto weights_out_edge = weights->getChildEdges()[0].lock(); @@ -807,18 +854,22 @@ void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph& graph) { auto initializeInputZeroPoints = [](const NodePtr& node, const NodePtr& parent0, const NodePtr& parent1) { auto* convNode = dynamic_cast(node.get()); - if (convNode == nullptr) + if (convNode == nullptr) { OPENVINO_THROW("Cannot get convolution node ", node->getName()); + } auto IC = node->getInputShapeAtPort(0).getDims()[1]; auto OC = node->getOutputShapeAtPort(0).getDims()[1]; - if (Shape::UNDEFINED_DIM == IC || Shape::UNDEFINED_DIM == OC) + if (Shape::UNDEFINED_DIM == IC || Shape::UNDEFINED_DIM == OC) { return false; - if (parent0->getType() != Type::Eltwise) + } + if (parent0->getType() != Type::Eltwise) { return false; - if (!parent0->getFusedWith().empty() || !parent1->getFusedWith().empty()) + } + if (!parent0->getFusedWith().empty() || !parent1->getFusedWith().empty()) { return false; + } // The plug-in doesn't support FP32 convolution with input/weights zero points. // In case weights are in FP32 (or we have zero points on weights which are not supported by INT8 convolution) @@ -828,49 +879,59 @@ void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph& graph) { return false; } - if (parent0->getAlgorithm() != Algorithm::EltwiseSubtract) + if (parent0->getAlgorithm() != Algorithm::EltwiseSubtract) { return false; + } - if (parent0->getParentEdges().size() != 2) + if (parent0->getParentEdges().size() != 2) { return false; + } auto subtractArg1 = parent0->getParentEdgeAt(1)->getParent(); - if (subtractArg1->getType() != Type::Input || !subtractArg1->isConstant()) + if (subtractArg1->getType() != Type::Input || !subtractArg1->isConstant()) { return false; + } - if (subtractArg1->getOriginalOutputPrecisionAtPort(0) != ov::element::u8) + if (subtractArg1->getOriginalOutputPrecisionAtPort(0) != ov::element::u8) { return false; + } if (parent0->getInputShapeAtPort(1).getRank() < 2) { return false; } auto zpDims = parent0->getInputShapeAtPort(1).getDims(); - if (zpDims[0] != 1 || !dimsEqualStrong(zpDims[1], IC)) + if (zpDims[0] != 1 || !dimsEqualStrong(zpDims[1], IC)) { return false; + } for (size_t i = 2; i < zpDims.size(); i++) { - if (zpDims[i] != 1) + if (zpDims[i] != 1) { return false; + } } const auto& parentEdge = parent0->getParentEdgeAt(0); const auto& subtractArg0 = parentEdge->getParent(); const size_t portNum = parentEdge->getInputNum(); - if (subtractArg0->getOriginalOutputPrecisionAtPort(portNum) != ov::element::u8) + if (subtractArg0->getOriginalOutputPrecisionAtPort(portNum) != ov::element::u8) { return false; + } auto zeroPointsConstant = dynamic_cast(subtractArg1.get()); - if (zeroPointsConstant == nullptr) + if (zeroPointsConstant == nullptr) { OPENVINO_THROW("Cannot cast to Input node"); + } auto zeroPointsBlob = zeroPointsConstant->getMemoryPtr(); - if (zeroPointsBlob == nullptr) + if (zeroPointsBlob == nullptr) { OPENVINO_THROW("Cannot cast to TBlob internal zero points blob"); + } auto zeroPointsData = static_cast(zeroPointsBlob->getData()); - if (zeroPointsData == nullptr) + if (zeroPointsData == nullptr) { OPENVINO_THROW("zeroPointsBlob has not allocated buffer"); + } auto zeroPointDataSize = parent0->getInputShapeAtPort(1).getDims()[1]; if (Shape::UNDEFINED_DIM == zeroPointDataSize) { @@ -882,25 +943,31 @@ void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph& graph) { auto initializeOutputCompensation = [](const NodePtr& node) { auto* convNode = dynamic_cast(node.get()); - if (convNode == nullptr) + if (convNode == nullptr) { OPENVINO_THROW("Cannot get convolution node ", node->getName()); + } - if (convNode->legacyInputZeroPoints.empty()) + if (convNode->legacyInputZeroPoints.empty()) { return; - if (convNode->legacyOutputCompensation.empty()) + } + if (convNode->legacyOutputCompensation.empty()) { convNode->legacyOutputCompensation.resize(convNode->getOutputShapeAtPort(0).getDims()[1]); + } auto weightsConstant = dynamic_cast(convNode->getParentEdgeAt(1)->getParent().get()); - if (!weightsConstant || !weightsConstant->isConstant()) + if (!weightsConstant || !weightsConstant->isConstant()) { return; + } auto weightsBlob = weightsConstant->getMemoryPtr(); - if (weightsBlob == nullptr) + if (weightsBlob == nullptr) { OPENVINO_THROW("Cannot cast to TBlob internal weights blob"); + } auto weightsPtr = static_cast(weightsBlob->getData()); - if (weightsPtr == nullptr) + if (weightsPtr == nullptr) { OPENVINO_THROW("weightsBlob has not allocated buffer"); + } auto G = convNode->getGroupNum(); const size_t groupOffset = convNode->getAlgorithm() == Algorithm::ConvolutionGrouped ? 1 : 0; @@ -946,8 +1013,9 @@ void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph& graph) { for (size_t i = 0; i < graphNodes.size(); i++) { auto conv = graphNodes[i]; - if (!isSuitableConvNode(conv)) + if (!isSuitableConvNode(conv)) { continue; + } CPU_GRAPH_OPTIMIZER_SCOPE(FuseConvolutionAndZeroPoints_ConvNode); @@ -995,8 +1063,9 @@ void GraphOptimizer::FuseFullyConnectedAndSimpleOperation(Graph& graph) { auto parentEdges = childNode->parentEdges; for (auto& parentEdge : parentEdges) { auto p_edge = parentEdge.lock(); - if (p_edge->getParent()->getType() == Type::FullyConnected) + if (p_edge->getParent()->getType() == Type::FullyConnected) { continue; + } graph.RemoveEdge(p_edge); } @@ -1035,8 +1104,9 @@ void GraphOptimizer::FuseMatMulAndSimpleOperation(Graph& graph) { auto parentEdges = childNode->parentEdges; for (auto& parentEdge : parentEdges) { auto p_edge = parentEdge.lock(); - if (p_edge->getParent()->getType() == Type::MatMul) + if (p_edge->getParent()->getType() == Type::MatMul) { continue; + } graph.RemoveEdge(p_edge); } @@ -1059,18 +1129,22 @@ void GraphOptimizer::FuseConvolutionAndDWConvolution(Graph& graph) { }; auto isSuitableParentConvolution = [&](const NodePtr& node) { - if (node->isDropped()) + if (node->isDropped()) { return false; + } - if (node->isDynamicNode()) + if (node->isDynamicNode()) { return false; + } const auto conv = std::dynamic_pointer_cast(node); - if (conv == nullptr) + if (conv == nullptr) { OPENVINO_THROW("Cannot cast to convolution node ", node->getName()); + } - if (!conv->legacyWeightsZeroPoints.empty()) + if (!conv->legacyWeightsZeroPoints.empty()) { return false; + } const auto& strides = conv->getStride(); const auto& paddings = conv->getPaddingL(); @@ -1088,32 +1162,38 @@ void GraphOptimizer::FuseConvolutionAndDWConvolution(Graph& graph) { static_cast(paddings[paddings.size() - 1]), static_cast(paddings[paddings.size() - 2])) && !conv->canBeExecutedInInt8(); - if (!isSupportedParams) + if (!isSupportedParams) { return false; + } return node->getChildEdges().size() == 1 && isConvolutionNode(node->getChildEdgeAt(0)->getChild()); }; auto isSuitableChildConvolution = [&](const NodePtr& parentNode, const NodePtr& childNode) { - if (parentNode->isDropped() || childNode->isDropped()) + if (parentNode->isDropped() || childNode->isDropped()) { return false; + } - if (childNode->isDynamicNode()) + if (childNode->isDynamicNode()) { return false; + } const auto convChild = std::dynamic_pointer_cast(childNode); - if (convChild == nullptr) + if (convChild == nullptr) { OPENVINO_THROW("Cannot cast to convolution node ", childNode->getName()); + } const auto convParent = std::dynamic_pointer_cast(parentNode); - if (convParent == nullptr) + if (convParent == nullptr) { OPENVINO_THROW("Cannot cast to convolution node ", parentNode->getName()); + } if (!everyone_is(ov::element::f32, convParent->getOriginalOutputPrecisionAtPort(0), convChild->getOriginalInputPrecisionAtPort(0), - convChild->getOriginalOutputPrecisionAtPort(0))) + convChild->getOriginalOutputPrecisionAtPort(0))) { return false; + } auto parentOutputPrecision = !parentNode->fusedWith.empty() @@ -1125,11 +1205,13 @@ void GraphOptimizer::FuseConvolutionAndDWConvolution(Graph& graph) { ? childNode->fusedWith[childNode->fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0) : childNode->getOriginalOutputPrecisionAtPort(0); - if (!everyone_is(ov::element::f32, parentOutputPrecision, childOutputPrecision)) + if (!everyone_is(ov::element::f32, parentOutputPrecision, childOutputPrecision)) { return false; + } - if (!convChild->legacyInputZeroPoints.empty() || !convChild->legacyWeightsZeroPoints.empty()) + if (!convChild->legacyInputZeroPoints.empty() || !convChild->legacyWeightsZeroPoints.empty()) { return false; + } bool withBias = convChild->getOriginalInputPrecisions().size() == 3; @@ -1171,33 +1253,39 @@ void GraphOptimizer::FuseConvolutionAndDWConvolution(Graph& graph) { int dw_conv_output_size = outDims[0] * outDims[1] * outDims[2] * outDims[3] * elemSize; auto parentConvolutionNode = std::dynamic_pointer_cast(parentNode); - if (parentConvolutionNode == nullptr) + if (parentConvolutionNode == nullptr) { OPENVINO_THROW("Cannot get convolution node ", parentNode->getName()); + } - if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx2) || impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core)) + if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx2) || impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core)) { return false; + } return (dw_conv_input_size + dw_conv_output_size > L3_cache_size / 2); }; for (size_t i = 0; i < graphNodes.size(); i++) { - if (!isConvolutionNode(graphNodes[i])) + if (!isConvolutionNode(graphNodes[i])) { continue; + } auto parentConvNode = graphNodes[i]; - if (!isSuitableParentConvolution(parentConvNode)) + if (!isSuitableParentConvolution(parentConvNode)) { continue; + } CPU_GRAPH_OPTIMIZER_SCOPE(FuseConvolutionAndDWConvolution_ParentConv); auto childConvNode = parentConvNode->getChildEdgeAt(0)->getChild(); - if (!isSuitableChildConvolution(parentConvNode, childConvNode)) + if (!isSuitableChildConvolution(parentConvNode, childConvNode)) { continue; + } CPU_GRAPH_OPTIMIZER_SCOPE(FuseConvolutionAndDWConvolution_ChildConv); - if (!isFusingWorthwhile(parentConvNode, childConvNode)) + if (!isFusingWorthwhile(parentConvNode, childConvNode)) { continue; + } parentConvNode->addFusedNode(childConvNode); @@ -1257,8 +1345,9 @@ void GraphOptimizer::FuseConvolutionAndSimpleOperationThroughMaxPool(Graph& grap auto parentEdges = fuseCandidate->parentEdges; for (auto& parentEdge : parentEdges) { auto p_edge = parentEdge.lock(); - if (p_edge->getParent() == childNode) + if (p_edge->getParent() == childNode) { continue; + } graph.RemoveEdge(p_edge); } @@ -1298,8 +1387,9 @@ void GraphOptimizer::FuseConvolutionAndSimpleOperation(Graph& graph) { auto parentEdges = childNode->parentEdges; for (auto& parentEdge : parentEdges) { auto p_edge = parentEdge.lock(); - if (p_edge->getParent()->getType() == parentNodeType) + if (p_edge->getParent()->getType() == parentNodeType) { continue; + } graph.RemoveEdge(p_edge); } @@ -1314,8 +1404,9 @@ void GraphOptimizer::FusePoolingAndFakeQuantize(Graph& graph) { auto isSuitableParentNode = [](const NodePtr& node) { if (node->getType() == Type::Pooling) { - if (!one_of(node->getOriginalInputPrecisionAtPort(0), ov::element::u8, ov::element::i8)) + if (!one_of(node->getOriginalInputPrecisionAtPort(0), ov::element::u8, ov::element::i8)) { return false; + } return node->getChildEdges().size() == 1 && node->getAlgorithm() == Algorithm::PoolingAvg; } return false; @@ -1327,14 +1418,16 @@ void GraphOptimizer::FusePoolingAndFakeQuantize(Graph& graph) { for (size_t i = 0; i < graphNodes.size(); i++) { auto parent = graphNodes[i]; - if (!isSuitableParentNode(parent)) + if (!isSuitableParentNode(parent)) { continue; + } CPU_GRAPH_OPTIMIZER_SCOPE(FusePoolingAndFakeQuantize_ParentNode); auto child = parent->getChildEdgeAt(0)->getChild(); - if (!isSuitableChildNode(child)) + if (!isSuitableChildNode(child)) { continue; + } CPU_GRAPH_OPTIMIZER_SCOPE(FusePoolingAndFakeQuantize_ChildNode); @@ -1343,8 +1436,9 @@ void GraphOptimizer::FusePoolingAndFakeQuantize(Graph& graph) { auto parents = child->parentEdges; for (size_t i = 0; i < parents.size(); i++) { auto p_edge = parents[i].lock(); - if (p_edge->getParent()->getType() == Type::Pooling) + if (p_edge->getParent()->getType() == Type::Pooling) { continue; + } graph.RemoveEdge(p_edge); } @@ -1367,8 +1461,9 @@ static bool is_data_dependency(const std::shared_ptr& parent, const std::s for (; !nextLayers.empty();) { auto layer = *nextLayers.begin(); - if (layer == child.get()) + if (layer == child.get()) { return true; + } for (auto& oe : layer->getChildEdges()) { auto nn = oe.lock()->getChild(); if (visited.find(nn.get()) == visited.end()) { @@ -1433,13 +1528,15 @@ void GraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(Graph& graph) for (auto& graphNode : graphNodes) { const auto eltwiseNode = std::dynamic_pointer_cast(graphNode); if (graphNode->getType() != Type::Eltwise || graphNode->getAlgorithm() != Algorithm::EltwiseAdd || - !eltwiseNode || eltwiseNode->isWithBroadcast()) + !eltwiseNode || eltwiseNode->isWithBroadcast()) { continue; + } // TODO: Enlarge to several inputs bool isSuitableNode = graphNode->getParentEdges().size() == 2; - if (!isSuitableNode) + if (!isSuitableNode) { continue; + } auto parent1 = graphNode->getParentEdgeAt(0)->getParent(); auto parent2 = graphNode->getParentEdgeAt(1)->getParent(); @@ -1450,11 +1547,13 @@ void GraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(Graph& graph) parent2->getType() == Type::Convolution || parent2->getType() == Type::BinaryConvolution; auto canFuseSum = [](node::BinaryConvolution* binConv, const NodePtr& fuseCandidate) { - if (binConv->getImplType() == impl_desc_type::ref) + if (binConv->getImplType() == impl_desc_type::ref) { return false; + } - if (binConv->isFusedWith(Type::FakeQuantize)) + if (binConv->isFusedWith(Type::FakeQuantize)) { return false; + } if (fuseCandidate->getAlgorithm() == Algorithm::EltwiseAdd) { for (auto& fusedNode : binConv->fusedWith) { @@ -1481,8 +1580,9 @@ void GraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(Graph& graph) auto checkFusedWithSum = [](Convolution* conv) -> bool { for (const auto& node : conv->getFusedWith()) { const auto eltwise = std::dynamic_pointer_cast(node); - if (eltwise && eltwise->isSpecialConvolutionAddFusing()) + if (eltwise && eltwise->isSpecialConvolutionAddFusing()) { return true; + } } return false; }; @@ -1505,8 +1605,9 @@ void GraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(Graph& graph) } } - if (!isSuitableParent1 && !isSuitableParent2) + if (!isSuitableParent1 && !isSuitableParent2) { continue; + } std::shared_ptr mergedConv; std::shared_ptr peerNode; @@ -1547,19 +1648,22 @@ void GraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(Graph& graph) peerNode = parent1; } } - if (peerNode->isConstant()) + if (peerNode->isConstant()) { continue; + } const auto& sum = graphNode; - if (mergedConv->isConstant() && !sum->isConstant()) + if (mergedConv->isConstant() && !sum->isConstant()) { continue; + } // Disable fusing for Add with broadcasing in case of known data ranges. Add with brodcasting triggers // non-optimal code path inside Convolution node, so better to avoid fusing at all. const auto& shape1 = sum->getInputShapeAtPort(0); const auto& shape2 = sum->getInputShapeAtPort(1); - if (shape1.getRank() != shape2.getRank()) + if (shape1.getRank() != shape2.getRank()) { continue; + } const auto& dims1 = shape1.getDims(); const auto& dims2 = shape2.getDims(); @@ -1579,28 +1683,33 @@ void GraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(Graph& graph) auto lastNode = sum; bool fuse_allowed = mergedConv->getChildEdges().size() == 1; - for (size_t j = 0; fuse_allowed && j < mergedConv->getParentEdges().size(); j++) - if (mergedConv->getParentEdgeAt(j)->getParent() == peerNode) + for (size_t j = 0; fuse_allowed && j < mergedConv->getParentEdges().size(); j++) { + if (mergedConv->getParentEdgeAt(j)->getParent() == peerNode) { fuse_allowed = false; + } + } // Fused Conv+Sum prim will be used inplace. That's mean that input blob will // be overwritten. Should verify that all other consumer already read it and // we can spoil input data. // TODO: rewrite once we add "Inplace" reporting mechanism for (auto& edge : peerNode->getChildEdges()) { - if (!fuse_allowed) + if (!fuse_allowed) { break; + } fuse_allowed &= is_data_dependency(edge.lock()->getChild(), sum); } - if (!fuse_allowed) + if (!fuse_allowed) { continue; + } if (graphNode->getChildEdges().size() == 1 && isFusingSupported(graphNode, graphNode->getChildEdgeAt(0)->getChild())) { auto relu_shared = graphNode->getChildEdgeAt(0)->getChild(); lastNode = relu_shared; - if (mergedConv->isConstant() && !lastNode->isConstant()) + if (mergedConv->isConstant() && !lastNode->isConstant()) { continue; + } sum->fuseInto(mergedConv); } @@ -1628,12 +1737,14 @@ void GraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(Graph& graph) int childPort = 1; auto* mergedConvNode = dynamic_cast(mergedConv.get()); - if (mergedConvNode != nullptr) + if (mergedConvNode != nullptr) { childPort = mergedConvNode->getParentEdges().size(); + } auto* mergedBinConvNode = dynamic_cast(mergedConv.get()); - if (mergedBinConvNode != nullptr) + if (mergedBinConvNode != nullptr) { childPort = mergedBinConvNode->getParentEdges().size(); + } graph.CreateEdge(peerNode, mergedConv, peer_port, childPort); @@ -1687,8 +1798,9 @@ void GraphOptimizer::FuseMVNAndSimpleOperation(Graph& graph) { auto parentEdges = childNode->parentEdges; for (auto& parentEdge : parentEdges) { auto p_edge = parentEdge.lock(); - if (p_edge->getParent()->getType() == Type::MVN) + if (p_edge->getParent()->getType() == Type::MVN) { continue; + } graph.RemoveEdge(p_edge); } @@ -1709,12 +1821,14 @@ void GraphOptimizer::FuseInterpolateAndSimpleOperation(Graph& graph) { // Avoid cycle dependencies for (auto& childParentEdge : childNode->getParentEdges()) { for (auto& parentParentEdge : parentNode->getParentEdges()) { - if (childParentEdge.lock()->getParent() == parentParentEdge.lock()->getParent()) + if (childParentEdge.lock()->getParent() == parentParentEdge.lock()->getParent()) { return false; + } } } - if (!childNode->getFusedWith().empty()) + if (!childNode->getFusedWith().empty()) { return false; + } auto interpolateNode = dynamic_cast(parentNode.get()); if (!interpolateNode) { OPENVINO_THROW("Cannot cast ", parentNode->getName(), " to Interpolate"); @@ -1746,8 +1860,9 @@ void GraphOptimizer::FuseInterpolateAndSimpleOperation(Graph& graph) { auto parentEdges = childNode->parentEdges; for (auto& parentEdge : parentEdges) { auto p_edge = parentEdge.lock(); - if (p_edge->getParent()->getType() == Type::Interpolate) + if (p_edge->getParent()->getType() == Type::Interpolate) { continue; + } graph.RemoveEdge(p_edge); } @@ -1786,8 +1901,9 @@ void GraphOptimizer::FuseNormalizeL2AndSimpleOperation(Graph& graph) { auto parentEdges = childNode->parentEdges; for (auto& parentEdge : parentEdges) { auto p_edge = parentEdge.lock(); - if (p_edge->getParent()->getType() == Type::NormalizeL2) + if (p_edge->getParent()->getType() == Type::NormalizeL2) { continue; + } graph.RemoveEdge(p_edge); } @@ -1826,10 +1942,12 @@ void GraphOptimizer::FuseReduceAndSimpleOperation(Graph& graph) { auto parentEdges = childNode->parentEdges; for (auto& parentEdge : parentEdges) { auto p_edge = parentEdge.lock(); - if (p_edge == nullptr) + if (p_edge == nullptr) { OPENVINO_THROW("Cannot get parent edge ", childNode->getName()); - if (p_edge->getParent()->getType() == Type::Reduce) + } + if (p_edge->getParent()->getType() == Type::Reduce) { continue; + } graph.RemoveEdge(p_edge); } @@ -1847,8 +1965,9 @@ void GraphOptimizer::FuseEltwiseAndSimple(Graph& graph) { }; auto isSuitableChildNode = [&](const NodePtr& parentNode, const NodePtr& childNode) { - if (parentNode->isConstant() && !childNode->isConstant()) + if (parentNode->isConstant() && !childNode->isConstant()) { return false; + } for (auto& childParentEdge : childNode->getParentEdges()) { // WA to prevent unsupported reorder exception issue in some cases if (childParentEdge.lock()->getParent()->getType() == Type::Split) { @@ -1857,13 +1976,15 @@ void GraphOptimizer::FuseEltwiseAndSimple(Graph& graph) { // Avoid cycle dependencies for (auto& parentParentEdge : parentNode->getParentEdges()) { - if (childParentEdge.lock()->getParent() == parentParentEdge.lock()->getParent()) + if (childParentEdge.lock()->getParent() == parentParentEdge.lock()->getParent()) { return false; + } } } - if (!childNode->getFusedWith().empty()) + if (!childNode->getFusedWith().empty()) { return false; + } return parentNode->canFuse(childNode); }; @@ -1899,8 +2020,9 @@ void GraphOptimizer::FuseEltwiseAndSimple(Graph& graph) { auto parentEdges = childNode->parentEdges; for (auto& parentEdge : parentEdges) { auto p_edge = parentEdge.lock(); - if (p_edge->getParent()->getType() == Type::Eltwise) + if (p_edge->getParent()->getType() == Type::Eltwise) { continue; + } graph.RemoveEdge(p_edge); } @@ -1913,19 +2035,23 @@ void GraphOptimizer::FuseEltwiseAndSimple(Graph& graph) { for (size_t i = 0; i < parents.size(); i++) { auto p_edge = parents[i].lock(); - if (!p_edge) + if (!p_edge) { continue; + } auto parent = p_edge->getParent(); - if (!parent) + if (!parent) { continue; + } if (parent == parentNode) { for (size_t j = 0; j < children.size(); j++) { - if (!children[j].lock()) + if (!children[j].lock()) { continue; + } auto child = children[j].lock()->getChild(); - if (!child) + if (!child) { continue; + } EdgePtr& remEdge = p_edge; int inNum = 0; @@ -1956,8 +2082,9 @@ void GraphOptimizer::FuseEltwiseAndSimple(Graph& graph) { graph.RemoveEdge(remEdge); } - if (parentNode->inputShapes.size() < static_cast(outNum + 1)) + if (parentNode->inputShapes.size() < static_cast(outNum + 1)) { parentNode->inputShapes.resize(outNum + 1); + } parentNode->inputShapes[outNum] = parent->getOutputShapeAtPort(inNum); graph.CreateEdge(parent, parentNode, inNum, outNum); @@ -1973,29 +2100,34 @@ void GraphOptimizer::FuseEltwiseAndSimple(Graph& graph) { void GraphOptimizer::ShareReorders(Graph& graph) { auto getSuitableReorder = [](const NodePtr& node) -> Reorder* { - if (node->getType() != Type::Reorder) + if (node->getType() != Type::Reorder) { return nullptr; + } Reorder* reorder = dynamic_cast(node.get()); - if (reorder == nullptr) + if (reorder == nullptr) { OPENVINO_THROW("Cannot get reorder layer ", node->getName()); + } // inplace children cannot be safely shared with each other auto reorderConsumers = reorder->getChildEdgesAtPort(0); if (std::any_of(reorderConsumers.begin(), reorderConsumers.end(), [](const EdgePtr& e) { return e->inPlace(Edge::LOOK_DOWN); - })) + })) { return nullptr; + } return reorder; }; std::set dropped; for (const auto& node : graph.GetNodes()) { - if (dropped.find(node) != dropped.end()) + if (dropped.find(node) != dropped.end()) { continue; + } Reorder* reorder = getSuitableReorder(node); - if (!reorder) + if (!reorder) { continue; + } // find shareable sibling auto dataEdge = reorder->getParentEdgeAt(0); @@ -2003,29 +2135,34 @@ void GraphOptimizer::ShareReorders(Graph& graph) { auto parentPort = dataEdge->getInputNum(); for (auto& edge : parentNode->getChildEdgesAtPort(parentPort)) { auto siblingNode = edge->getChild(); - if (siblingNode == node) + if (siblingNode == node) { continue; + } Reorder* siblingReorder = getSuitableReorder(siblingNode); - if (!siblingReorder) + if (!siblingReorder) { continue; - if (!reorder->getOutput().isCompatible(siblingReorder->getOutput())) + } + if (!reorder->getOutput().isCompatible(siblingReorder->getOutput())) { continue; + } DEBUG_LOG(node->getName(), " is shared by ", siblingNode->getName()); // siblingReorder can share output with current reorder for (const auto& pwEdge : siblingReorder->getParentEdges()) { auto pEdge = pwEdge.lock(); - if (pEdge) + if (pEdge) { graph.RemoveEdge(pEdge); + } } for (const auto& pwEdge : siblingReorder->getChildEdges()) { auto pEdge = pwEdge.lock(); if (pEdge) { graph.RemoveEdge(pEdge); - if (pEdge->getInputNum() == 0) + if (pEdge->getInputNum() == 0) { graph.CreateEdge(node, pEdge->getChild(), 0, pEdge->getOutputNum()); + } } } @@ -2044,11 +2181,13 @@ void GraphOptimizer::DropDoubleReorders(Graph& graph) { node->getChildEdges().size() == 1 && node->getChildEdgeAt(0)->getChild()->getType() == Type::Reorder) { auto nextNode = node->getChildEdgeAt(0)->getChild(); Reorder* n = dynamic_cast(node.get()); - if (n == nullptr) + if (n == nullptr) { OPENVINO_THROW("Cannot get reorder layer ", node->getName()); + } Reorder* nn = dynamic_cast(nextNode.get()); - if (nn == nullptr) + if (nn == nullptr) { OPENVINO_THROW("Cannot get reorder layer ", nextNode->getName()); + } NodePtr p = n->getParentEdgeAt(0)->getParent(); NodePtr c = nn->getChildEdgeAt(0)->getChild(); @@ -2063,11 +2202,13 @@ void GraphOptimizer::DropDoubleReorders(Graph& graph) { EdgePtr edge; for (auto& cur : p->getChildEdgesAtPort(oldEdgeNum)) { - if (cur->getChild() == c) + if (cur->getChild() == c) { edge = cur; + } } - if (!edge) + if (!edge) { OPENVINO_THROW("Inappropriate graph processing"); + } std::string layerName = edge->getParent()->getName() + "_ScaleReorder_" + edge->getChild()->getName(); graph.InsertReorder(edge, layerName, n->getInput(), nn->getOutput(), false); @@ -2090,22 +2231,26 @@ void GraphOptimizer::FuseClampAndFakeQuantize(Graph& graph) { auto fuseClampAndFakeQuantizeNodes = [](const NodePtr& parent, const NodePtr& child) { auto* eltwiseNode = dynamic_cast(parent.get()); - if (eltwiseNode == nullptr) + if (eltwiseNode == nullptr) { OPENVINO_THROW("Cannot cast ", parent->getName(), " to Eltwise node"); + } auto* fakeQuantizeNode = dynamic_cast(child.get()); - if (fakeQuantizeNode == nullptr) + if (fakeQuantizeNode == nullptr) { OPENVINO_THROW("Cannot cast ", child->getName(), " to FakeQuantize node"); + } const std::vector& cropLowData = fakeQuantizeNode->getCropLow(); const std::vector& cropHighData = fakeQuantizeNode->getCropHigh(); std::vector newCropLow(cropLowData.size()); std::vector newCropHigh(cropHighData.size()); - for (size_t i = 0; i < cropLowData.size(); i++) + for (size_t i = 0; i < cropLowData.size(); i++) { newCropLow[i] = std::max(cropLowData[i], eltwiseNode->getAlpha()); - for (size_t i = 0; i < cropHighData.size(); i++) + } + for (size_t i = 0; i < cropHighData.size(); i++) { newCropHigh[i] = std::min(cropHighData[i], eltwiseNode->getBeta()); + } fakeQuantizeNode->setCropLow(newCropLow); fakeQuantizeNode->setCropHigh(newCropHigh); @@ -2115,14 +2260,16 @@ void GraphOptimizer::FuseClampAndFakeQuantize(Graph& graph) { for (size_t i = 0; i < graphNodes.size(); i++) { auto parent = graphNodes[i]; - if (!isSuitableClampNode(parent)) + if (!isSuitableClampNode(parent)) { continue; + } CPU_GRAPH_OPTIMIZER_SCOPE(FuseClampAndFakeQuantize_ClalmpNode); auto child = parent->getChildEdgeAt(0)->getChild(); - if (!isSuitableFakeQuantizeNode(child)) + if (!isSuitableFakeQuantizeNode(child)) { continue; + } CPU_GRAPH_OPTIMIZER_SCOPE(FuseClampAndFakeQuantize_QuantizeNode); @@ -2139,12 +2286,14 @@ void GraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(Graph& graph) { std::vector nonConstPorts; for (size_t i = 0; i < node->getParentEdges().size(); i++) { const auto& parent = node->getParentEdgeAt(i)->getParent(); - if (!(parent->getType() == Type::Input && parent->isConstant())) + if (!(parent->getType() == Type::Input && parent->isConstant())) { nonConstPorts.push_back(i); + } } // there are more than 1 nonconst port or missed - if (nonConstPorts.size() != 1) + if (nonConstPorts.size() != 1) { return -1; + } return nonConstPorts[0]; }; @@ -2155,12 +2304,14 @@ void GraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(Graph& graph) { Algorithm::EltwiseSubtract, Algorithm::EltwiseMultiply, Algorithm::EltwiseDivide, - Algorithm::EltwiseMulAdd)) + Algorithm::EltwiseMulAdd)) { return false; + } const auto nonConstPort = getNonConstPort(node); - if (nonConstPort == -1) + if (nonConstPort == -1) { return false; + } const NodePtr eltwiseInput = node->getParentEdgeAt(nonConstPort)->getParent(); return node->getChildEdges().size() == 1 && node->canBePerformedAsScaleShift(eltwiseInput.get()); @@ -2172,8 +2323,9 @@ void GraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(Graph& graph) { auto fuseScaleShiftAndFakeQuantizeNodes = [getNonConstPort](const NodePtr& parent, const NodePtr& child) { auto fakeQuantizeNode = std::dynamic_pointer_cast(child); - if (fakeQuantizeNode == nullptr) + if (fakeQuantizeNode == nullptr) { OPENVINO_THROW("Cannot cast ", child->getName(), " to FakeQuantize node"); + } std::vector scalesBuffer; std::vector shiftsBuffer; @@ -2212,9 +2364,11 @@ void GraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(Graph& graph) { scalesBuffer = makeAlignedBuffer(outputDims[channelPos], scalesBuffer, 1); shiftsBuffer = makeAlignedBuffer(outputDims[channelPos], shiftsBuffer, 1); - for (size_t i = 0; i < scalesBuffer.size(); i++) - if (scalesBuffer[i] == 0.f) + for (size_t i = 0; i < scalesBuffer.size(); i++) { + if (scalesBuffer[i] == 0.f) { return false; + } + } const std::vector& cropLowData = fakeQuantizeNode->getCropLow(); const std::vector& cropHighData = fakeQuantizeNode->getCropHigh(); @@ -2288,14 +2442,16 @@ void GraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(Graph& graph) { for (size_t i = 0; i < graphNodes.size(); i++) { auto parent = graphNodes[i]; - if (!isSuitableScaleShiftNode(parent)) + if (!isSuitableScaleShiftNode(parent)) { continue; + } CPU_GRAPH_OPTIMIZER_SCOPE(FusePerformedAsScaleShiftAndFakeQuantize_ShiftNode); auto child = parent->getChildEdgeAt(0)->getChild(); - if (!isSuitableFakeQuantizeNode(child)) + if (!isSuitableFakeQuantizeNode(child)) { continue; + } CPU_GRAPH_OPTIMIZER_SCOPE(FusePerformedAsScaleShiftAndFakeQuantize_QuantizeNode); @@ -2303,8 +2459,9 @@ void GraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(Graph& graph) { auto parentEdges = parent->parentEdges; for (auto& parentEdge : parentEdges) { auto p_edge = parentEdge.lock(); - if (!p_edge->getParent()->isConstant()) + if (!p_edge->getParent()->isConstant()) { continue; + } graph.RemoveEdge(p_edge); } @@ -2383,8 +2540,9 @@ void GraphOptimizer::mergeTransposeReshapeReorder(Graph& graph, auto nodeAfterSequence = childNode->getChildEdgeAt(0)->getChild(); graph.RemoveEdge(transposeNode->getParentEdgeAt(1)); - if (reshapeNode) + if (reshapeNode) { graph.RemoveEdge(reshapeNode->getParentEdgeAt(1)); + } // To prevent inPlace conflict, we must check that the memory reference is unidirectional // or inPlace memory is not used @@ -2393,24 +2551,28 @@ void GraphOptimizer::mergeTransposeReshapeReorder(Graph& graph, // hold references to all children before dropping reorder_node std::vector> reorderChildren; - for (const auto& ccEdge : childNode->getChildEdgesAtPort(0)) + for (const auto& ccEdge : childNode->getChildEdgesAtPort(0)) { reorderChildren.emplace_back(ccEdge->getChild(), ccEdge->getOutputNum()); + } // detach nodes from graph by remove all of their edges // they will be removed in future graph.RemoveDroppedNodes() call auto detachNode = [&](const std::shared_ptr& node) { std::vector edges; edges = node->getParentEdges(); - for (auto& edge : edges) + for (auto& edge : edges) { graph.RemoveEdge(edge.lock()); + } edges = node->getChildEdges(); - for (auto& edge : edges) + for (auto& edge : edges) { graph.RemoveEdge(edge.lock()); + } }; detachNode(transposeNode); detachNode(reorderNode); - if (reshapeNode) + if (reshapeNode) { detachNode(reshapeNode); + } auto reorderInDesc = parentNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].getMemDesc(); auto finalDesc = childNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].getMemDesc(); @@ -2448,8 +2610,9 @@ void GraphOptimizer::mergeTransposeReshapeReorder(Graph& graph, std::string reorderName = nodeBeforeSequence->getName() + "_" + Reorder::getReorderArgs(*reorderInDesc, *reorderOutDesc); - if (isOptimized) + if (isOptimized) { reorderName += "_fake"; + } DEBUG_LOG("mergeTransposeAndReorder ", parentNode->getName(), " and ", childNode->getName(), " -> ", reorderName); auto reorder_layout = std::make_shared(*reorderInDesc, *reorderOutDesc, reorderName, graph.getGraphContext()); @@ -2472,8 +2635,9 @@ void GraphOptimizer::mergeTransposeReshapeReorder(Graph& graph, graph.CreateEdge(reorder_layout, reorder_last, 0, 0); } - for (auto& cc : reorderChildren) + for (auto& cc : reorderChildren) { graph.CreateEdge(reorder_last, cc.first, 0, cc.second); + } // initialize and add nodes into graph std::vector new_nodes; @@ -2481,23 +2645,28 @@ void GraphOptimizer::mergeTransposeReshapeReorder(Graph& graph, if (reorder_last != reorder_layout) { new_nodes.push_back(reorder_last); } - for (auto& node : new_nodes) + for (auto& node : new_nodes) { graph.AddNode(node); + } // multiple nodes must be initialized in specific order - for (auto& node : new_nodes) + for (auto& node : new_nodes) { node->init(); + } for (auto& node : new_nodes) { node->getSupportedDescriptors(); node->initSupportedPrimitiveDescriptors(); node->filterSupportedPrimitiveDescriptors(); } - for (auto& node : new_nodes) + for (auto& node : new_nodes) { node->selectOptimalPrimitiveDescriptor(); - for (auto& node : new_nodes) + } + for (auto& node : new_nodes) { node->resolveInPlaceDirection(); - for (auto& node : new_nodes) + } + for (auto& node : new_nodes) { node->initOptimalPrimitiveDescriptor(); + } } void GraphOptimizer::MergeTransposeAndReorder(Graph& graph) { @@ -2528,18 +2697,21 @@ void GraphOptimizer::MergeTransposeAndReorder(Graph& graph) { auto isSuitableReshape = [](const NodePtr& node) { if (node->getChildEdges().size() != 1 || node->getOutputShapeAtPort(0).isDynamic() || - node->getInputShapeAtPort(0).isDynamic()) + node->getInputShapeAtPort(0).isDynamic()) { return false; + } // Reshape supported only in one case: if one of the input dims is split into 2 consecutive dims const auto& inDims = node->getInputShapeAtPort(0).getDims(); const auto& outDims = node->getOutputShapeAtPort(0).getDims(); - if (outDims.size() - inDims.size() != 1) + if (outDims.size() - inDims.size() != 1) { return false; + } size_t mismatchCount = 0; for (size_t i = 0; i < inDims.size(); ++i) { - if (i + mismatchCount >= outDims.size()) + if (i + mismatchCount >= outDims.size()) { return false; + } if (inDims[i] != outDims[i + mismatchCount]) { mismatchCount++; } @@ -2554,8 +2726,9 @@ void GraphOptimizer::MergeTransposeAndReorder(Graph& graph) { }; auto updateOrder = [](const VectorDims& originalOrder, const NodePtr& reshape) { - if (!reshape) + if (!reshape) { return originalOrder; + } // Further logic works with transpose order without Reshape. // If there is a Reshape node, which splits one of the dimensions into 2 consecutive ones, @@ -2571,8 +2744,9 @@ void GraphOptimizer::MergeTransposeAndReorder(Graph& graph) { for (size_t i = 0; i < reshapeInShape.size(); ++i) { if (reshapeInShape[i] != reshapeOutShape[i]) { for (size_t j = 0; j < originalOrder.size(); ++j) { - if (originalOrder[j] == i) + if (originalOrder[j] == i) { return j; + } } } } @@ -2659,18 +2833,21 @@ void GraphOptimizer::MergeReorderAndTranspose(Graph& graph) { auto isSuitableReshape = [](const NodePtr& node) { if (node->getChildEdges().size() != 1 || node->getOutputShapeAtPort(0).isDynamic() || - node->getInputShapeAtPort(0).isDynamic()) + node->getInputShapeAtPort(0).isDynamic()) { return false; + } // Reshape supported only in one case: if two consecutive input dims are merged into 1 const auto& inShape = node->getInputShapeAtPort(0).getDims(); const auto& outShape = node->getOutputShapeAtPort(0).getDims(); - if (inShape.size() - outShape.size() != 1) + if (inShape.size() - outShape.size() != 1) { return false; + } size_t mismatchCount = 0; for (size_t i = 0; i < outShape.size(); ++i) { - if (i + mismatchCount >= inShape.size()) + if (i + mismatchCount >= inShape.size()) { return false; + } if (outShape[i] != inShape[i + mismatchCount]) { mismatchCount++; } @@ -2683,8 +2860,9 @@ void GraphOptimizer::MergeReorderAndTranspose(Graph& graph) { }; auto updateOrder = [](const VectorDims& originalOrder, const NodePtr& reshape) { - if (!reshape) + if (!reshape) { return originalOrder; + } // Further logic works with order without Reshape. // If there is Reshape node which merges 2 consecutive dims into one, @@ -2781,8 +2959,9 @@ void GraphOptimizer::reshapeRnnSeq(Graph& graph) { auto& graphNodes = graph.GetNodes(); auto isSuitableParentNode = [](const NodePtr& node) { - if (node->type != Type::RNNSeq) + if (node->type != Type::RNNSeq) { return false; + } auto rnnNode = std::dynamic_pointer_cast(node); return rnnNode && !rnnNode->hasNativeOrder() && node->outputShapes[0].getRank() == 4 && node->outputShapes[0].getDims()[1] == 1; diff --git a/src/plugins/intel_cpu/src/hash_builder.hpp b/src/plugins/intel_cpu/src/hash_builder.hpp index 5aa87cae2ac0b1..c17823661a95fd 100644 --- a/src/plugins/intel_cpu/src/hash_builder.hpp +++ b/src/plugins/intel_cpu/src/hash_builder.hpp @@ -30,8 +30,9 @@ size_t combine(size_t seed, const T& v) { template size_t combine(size_t seed, const std::vector& v) { - for (const auto& elem : v) + for (const auto& elem : v) { seed = combine(seed, elem); + } return seed; } diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index d3b0b4c534be2a..4281923e2959b2 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -125,8 +125,9 @@ void SyncInferRequest::infer() { std::vector SyncInferRequest::get_profiling_info() const { auto&& graph = m_compiled_model.graph(); - if (!graph.IsReady()) + if (!graph.IsReady()) { OPENVINO_THROW("Graph is not ready!"); + } std::vector perfMap; graph.GetPerfData(perfMap); return perfMap; @@ -164,15 +165,17 @@ void SyncInferRequest::change_default_ptr(Graph& graph) { for (auto& it : m_input_external_ptr) { auto inputNodePtr = graph.getInputNodeByIndex(it.first); OPENVINO_ASSERT(inputNodePtr, "Cannot find input tensor with index: ", it.first); - if (inputNodePtr->getDstDataAtPort(0) == static_cast(it.second->data())) + if (inputNodePtr->getDstDataAtPort(0) == static_cast(it.second->data())) { continue; + } auto& childEdges = inputNodePtr->getChildEdges(); // Perform checks that the user's memory will not be modified bool canBeInPlace = true; for (auto& childEdge : childEdges) { auto ce = childEdge.lock(); - if (!ce) + if (!ce) { OPENVINO_THROW("Node ", inputNodePtr->getName(), " contains empty child edge"); + } auto& child = ce->getChild(); @@ -201,8 +204,9 @@ void SyncInferRequest::change_default_ptr(Graph& graph) { if (canBeInPlace) { for (auto& edge : childEdges) { auto e = edge.lock(); - if (!e) + if (!e) { OPENVINO_THROW("Node ", inputNodePtr->getName(), " contains empty child edge"); + } changeInpPtr(e, it.second); } } @@ -213,8 +217,9 @@ void SyncInferRequest::change_default_ptr(Graph& graph) { OPENVINO_ASSERT(output, "Cannot find output tensor with index: ", it.first); auto parentEdge = output->getParentEdgeAt(0); void* const outputRawPtr = parentEdge->getMemory().getData(); - if (outputRawPtr == static_cast(it.second->data())) + if (outputRawPtr == static_cast(it.second->data())) { continue; + } bool canBeInPlace = true; // Cannot be in-place after concat because concat is using different ptrs without offsets @@ -235,8 +240,9 @@ void SyncInferRequest::change_default_ptr(Graph& graph) { auto& parentEdges = parent->getParentEdges(); for (auto& edge : parentEdges) { auto e = edge.lock(); - if (!e) + if (!e) { OPENVINO_THROW("Node ", parent->getName(), " contains empty parent edge"); + } if (parent_port == parent->inPlaceInputPort(e->getOutputNum())) { parent = e->getParent(); @@ -245,8 +251,9 @@ void SyncInferRequest::change_default_ptr(Graph& graph) { } } } while (previousParent != parent); - if (canBeInPlace) + if (canBeInPlace) { change_edge_ptr(parentEdge, it.second); + } } if (graph.IsDynamic()) { @@ -335,8 +342,9 @@ const ov::Output& SyncInferRequest::get_internal_port(const ov:: void SyncInferRequest::set_tensor(const ov::Output& in_port, const ov::SoPtr& in_tensor) { OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "set_tensor"); - if (!in_tensor) + if (!in_tensor) { OPENVINO_THROW("Failed to set empty tensor for port!"); + } auto port = get_internal_port(in_port); auto tensor = in_tensor; @@ -546,8 +554,9 @@ void SyncInferRequest::init_tensor(const std::size_t& port_index, const ov::ISyn control_block.tensor()->get_memory().get()); tensor = control_block.tensor(); - if (model_prec == graph_prec) + if (model_prec == graph_prec) { m_outputControlBlocks.emplace(std::make_pair(port_index, std::move(control_block))); + } } } else { tensor_shape = shape.to_shape(); diff --git a/src/plugins/intel_cpu/src/memory_control.cpp b/src/plugins/intel_cpu/src/memory_control.cpp index 757e3659c076d4..42a453c5b4d760 100644 --- a/src/plugins/intel_cpu/src/memory_control.cpp +++ b/src/plugins/intel_cpu/src/memory_control.cpp @@ -154,12 +154,14 @@ class MemoryManagerStatic : public IMemoryManager { } void allocate() override { - if (m_workspace) + if (m_workspace) { m_workspace->resize(m_totalSize); + } } void release() override { - if (m_workspace) + if (m_workspace) { m_workspace->free(); + } } private: @@ -377,8 +379,9 @@ edgeClusters MemoryControl::findEdgeClusters(const std::vector& graphEd for (auto& edge : graphEdges) { auto edge_it = edge_cluster_indices.find(edge); - if (edge_it != edge_cluster_indices.end()) + if (edge_it != edge_cluster_indices.end()) { continue; // edge is visited + } size_t cluster_idx = edge_clusters.size(); EdgePtr last_shared_edge = nullptr; @@ -397,10 +400,11 @@ edgeClusters MemoryControl::findEdgeClusters(const std::vector& graphEd // add shared edges to cluster edge_cluster_indices.emplace(edge, cluster_idx); - if (cluster_idx == edge_clusters.size()) + if (cluster_idx == edge_clusters.size()) { edge_clusters.emplace_back(edgeCluster{edge}); - else + } else { edge_clusters[cluster_idx].emplace(edge); + } for (auto shared_edge = edge->getSharedEdge(std::nothrow); shared_edge != last_shared_edge; shared_edge = shared_edge->getSharedEdge(std::nothrow)) { diff --git a/src/plugins/intel_cpu/src/memory_desc/blocked_memory_desc.cpp b/src/plugins/intel_cpu/src/memory_desc/blocked_memory_desc.cpp index 6bdba27f72f63d..1ac24218d5435c 100644 --- a/src/plugins/intel_cpu/src/memory_desc/blocked_memory_desc.cpp +++ b/src/plugins/intel_cpu/src/memory_desc/blocked_memory_desc.cpp @@ -18,8 +18,9 @@ constexpr BlockedMemoryDesc::CmpMask BlockedMemoryDesc::SKIP_OFFSET_MASK; constexpr size_t BlockedMemoryDesc::OFFSET_MASK_POS; bool BlockedMemoryDesc::isCompatibleInternal(const BlockedMemoryDesc& rhs, CmpMask cmpMask) const { - if (this->getShape() != rhs.getShape() || this->getPrecision() != rhs.getPrecision()) + if (this->getShape() != rhs.getShape() || this->getPrecision() != rhs.getPrecision()) { return false; + } if (!dimsEqualWeak(this->getBlockDims(), rhs.getBlockDims())) { return false; @@ -32,12 +33,14 @@ bool BlockedMemoryDesc::isCompatibleInternal(const BlockedMemoryDesc& rhs, CmpMa auto& thisStrides = this->getStrides(); auto& rhsStrides = rhs.getStrides(); - if (thisStrides.size() != rhsStrides.size()) + if (thisStrides.size() != rhsStrides.size()) { return false; + } for (size_t i = 0; i < thisStrides.size(); i++) { - if (cmpMask.test(i) && !dimsEqualWeak(thisStrides[i], rhsStrides[i])) + if (cmpMask.test(i) && !dimsEqualWeak(thisStrides[i], rhsStrides[i])) { return false; + } } if (!dimsEqualWeak(this->getOrder(), rhs.getOrder())) { @@ -71,7 +74,7 @@ std::string BlockedMemoryDesc::serializeFormat() const { const auto& blkDims = getBlockDims(); for (size_t i = shape.getRank(); i < order.size(); ++i) { - result << blkDims[i] << char(startLetter + order[i]); + result << blkDims[i] << static_cast(startLetter + order[i]); } return result.str(); diff --git a/src/plugins/intel_cpu/src/memory_desc/cpu_blocked_memory_desc.cpp b/src/plugins/intel_cpu/src/memory_desc/cpu_blocked_memory_desc.cpp index b6c80cdd92319e..7f978549e76fe1 100644 --- a/src/plugins/intel_cpu/src/memory_desc/cpu_blocked_memory_desc.cpp +++ b/src/plugins/intel_cpu/src/memory_desc/cpu_blocked_memory_desc.cpp @@ -142,8 +142,9 @@ size_t CpuBlockedMemoryDesc::getCurrentMemSizeImp() const { auto e_size = getOffsetPadding(); // size in bytes (from begin of data to last element) if (!getShape().hasZeroDims()) { e_size += 1; - for (size_t j = 0; j < getBlockDims().size(); j++) + for (size_t j = 0; j < getBlockDims().size(); j++) { e_size += (getBlockDims()[j] - 1) * getStrides()[j]; + } } const auto prc = getPrecision(); @@ -288,11 +289,13 @@ MemoryDescPtr CpuBlockedMemoryDesc::cloneWithNewDimsImp(const VectorDims& dims) // TODO [DS]: add stride recalculation for strided blobs for (int i = strides.size() - 2; i >= 0; i--) { - if (strides[i] == Shape::UNDEFINED_DIM) + if (strides[i] == Shape::UNDEFINED_DIM) { break; + } - if (strides[i] != strides[i + 1] * blockedDims[i + 1]) + if (strides[i] != strides[i + 1] * blockedDims[i + 1]) { OPENVINO_THROW_NOT_IMPLEMENTED("Can't clone desc with new dims for not dense tensor"); + } } VectorDims newBlockedDims(order.size()); @@ -329,16 +332,18 @@ bool CpuBlockedMemoryDesc::blocksExtended() const { size_t idx = order[i]; Dim paddedDim = 1; for (size_t j = rank; j < order.size(); j++) { - if (order[j] == idx) + if (order[j] == idx) { paddedDim *= blockedDims[j]; + } } if (blockedDims[idx] == Shape::UNDEFINED_DIM) { paddedDim = Shape::UNDEFINED_DIM; } else { paddedDim *= blockedDims[idx]; } - if (paddedDim != shape.getDims()[idx]) + if (paddedDim != shape.getDims()[idx]) { return true; + } } return false; } diff --git a/src/plugins/intel_cpu/src/memory_desc/dnnl_blocked_memory_desc.cpp b/src/plugins/intel_cpu/src/memory_desc/dnnl_blocked_memory_desc.cpp index 69b070e165df8b..871c134cd04d55 100644 --- a/src/plugins/intel_cpu/src/memory_desc/dnnl_blocked_memory_desc.cpp +++ b/src/plugins/intel_cpu/src/memory_desc/dnnl_blocked_memory_desc.cpp @@ -132,8 +132,9 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(ov::element::Type prc, auto lastIter = order.begin() + outer_ndims; for (size_t dim = 0; dim < outer_ndims; dim++) { - if (std::find(order.begin(), lastIter, dim) == lastIter) + if (std::find(order.begin(), lastIter, dim) == lastIter) { OPENVINO_THROW("Can not construct DnnlBlockedMemoryDesc because of incorrect order: ", vec2str(order)); + } } size_t inner_ndims = order.size() - dims.size(); @@ -153,8 +154,9 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(ov::element::Type prc, // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to transpose blocked dims // and may be we can achieve correct "descending strides" form which allow conversion. - if (!is_descending_strides) + if (!is_descending_strides) { OPENVINO_THROW("Can not construct DnnlBlockedMemoryDesc from strides: ", vec2str(strides)); + } } if (!strides.empty() && !emptyDesc && std::none_of(strides.begin(), strides.end(), [](size_t x) { @@ -165,10 +167,11 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(ov::element::Type prc, inner_block_are_dense &= (strides[i] == strides[i + 1] * blockedDims[i + 1]); } - if (!inner_block_are_dense) + if (!inner_block_are_dense) { OPENVINO_THROW("Can not construct DnnlBlockedMemoryDesc from strides: ", vec2str(strides), " inner blocks are not dense."); + } } // Fill general memory desc fields @@ -185,9 +188,10 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(ov::element::Type prc, return pad == 0; }); - if (!inner_pad_offsets_is_zero) + if (!inner_pad_offsets_is_zero) { OPENVINO_THROW("Can not construct DnnlBlockedMemoryDesc, inner pad offsets is not zero: ", vec2str(offsetPaddingToData)); + } auto dnnlPaddedOffsets = DnnlExtensionUtils::convertToDnnlDims(offsetPaddingToData); std::copy(dnnlPaddedOffsets.begin(), dnnlPaddedOffsets.begin() + outer_ndims, desc.get()->padded_offsets); } else { @@ -232,8 +236,9 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const Shape& shape, dnnl::memory::format_tag format) : MemoryDesc(shape, DnnlBlocked) { using namespace dnnl; - if (format == memory::format_tag::any || format == memory::format_tag::undef) + if (format == memory::format_tag::any || format == memory::format_tag::undef) { OPENVINO_THROW("Unexpected: Can't create dnnl::desc with any or undef format"); + } const auto& dims = shape.getDims(); if (format == memory::format_tag::x && shape.getRank() == 0) { @@ -301,8 +306,9 @@ bool DnnlBlockedMemoryDesc::isCompatible(const DnnlBlockedMemoryDesc& rhs, CmpMa return true; } - if (one_of(wrappedThis.format_kind(), format_kind::undef, format_kind::any)) + if (one_of(wrappedThis.format_kind(), format_kind::undef, format_kind::any)) { return false; + } const uint64_t stride_mask = (0xffffffffffffffff << cmpMask.size()) | cmpMask.to_ullong(); const bool checkOffset = cmpMask.test(OFFSET_MASK_POS); @@ -360,12 +366,14 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const_dnnl_memory_desc_t cdesc) : MemoryDesc(DnnlExtensionUtils::convertToVectorDims(cdesc->dims, cdesc->ndims), DnnlBlocked) { desc = dnnl::memory::desc(DnnlExtensionUtils::clone_desc(cdesc)); - if (desc.get_format_kind() == dnnl::memory::format_kind::any) + if (desc.get_format_kind() == dnnl::memory::format_kind::any) { OPENVINO_THROW("Unexpected: Memory format any is prohibited!"); + } dnnl::impl::memory_desc_wrapper descWrapped(desc.get()); - if (!descWrapped.is_blocking_desc()) + if (!descWrapped.is_blocking_desc()) { OPENVINO_THROW("Unexpected: Can't create DnnlBlockedMemoryDesc from not blocking desc"); + } order = extractOrder(desc); @@ -406,8 +414,9 @@ bool DnnlBlockedMemoryDesc::isPlainFormat() const { bool DnnlBlockedMemoryDesc::isBlockedCFormat(size_t blk_size) const { if (desc.get_format_kind() != dnnl::memory::format_kind::blocked || desc.get_inner_nblks() != 1 || - desc.get_inner_idxs()[0] != 1) + desc.get_inner_idxs()[0] != 1) { return false; + } if ((order.size() - shape.getRank()) != 1) { return false; @@ -483,11 +492,13 @@ MemoryDescPtr DnnlBlockedMemoryDesc::cloneWithNewDimsImp(const VectorDims& dims) // TODO [DS]: add stride recalculation for strided blobs for (int i = strides.size() - 2; i >= 0; i--) { - if (strides[i] == Shape::UNDEFINED_DIM) + if (strides[i] == Shape::UNDEFINED_DIM) { break; + } - if (strides[i] != strides[i + 1] * blockedDims[i + 1]) + if (strides[i] != strides[i + 1] * blockedDims[i + 1]) { OPENVINO_THROW_NOT_IMPLEMENTED("Can't clone desc with new dims for not dense tensor"); + } } return DnnlBlockedMemoryDescPtr(new DnnlBlockedMemoryDesc(cloneDescWithNewDims(desc, dims, order).get())); @@ -496,25 +507,32 @@ MemoryDescPtr DnnlBlockedMemoryDesc::cloneWithNewDimsImp(const VectorDims& dims) bool DnnlBlockedMemoryDesc::isSame(dnnl::memory::format_tag fmt) const { dnnl::memory::desc refDesc(desc.get_dims(), desc.get_data_type(), fmt); - if (desc.get_ndims() != refDesc.get_ndims()) + if (desc.get_ndims() != refDesc.get_ndims()) { return false; + } if (desc.get_format_kind() != dnnl::memory::format_kind::blocked || - refDesc.get_format_kind() != dnnl::memory::format_kind::blocked) + refDesc.get_format_kind() != dnnl::memory::format_kind::blocked) { OPENVINO_THROW("DnnlMemoryDesc::isSame is not implemented for non blocked memory format"); + } auto actualBlkDesc = desc.get()->format_desc.blocking; auto refBlkDesc = refDesc.get()->format_desc.blocking; - if (desc.get_inner_nblks() != refBlkDesc.inner_nblks) + if (desc.get_inner_nblks() != refBlkDesc.inner_nblks) { return false; + } - for (int i = 0; i < actualBlkDesc.inner_nblks; ++i) - if (actualBlkDesc.inner_blks[i] != refBlkDesc.inner_blks[i]) + for (int i = 0; i < actualBlkDesc.inner_nblks; ++i) { + if (actualBlkDesc.inner_blks[i] != refBlkDesc.inner_blks[i]) { return false; + } + } - for (int i = 0; i < actualBlkDesc.inner_nblks; ++i) - if (actualBlkDesc.inner_idxs[i] != refBlkDesc.inner_idxs[i]) + for (int i = 0; i < actualBlkDesc.inner_nblks; ++i) { + if (actualBlkDesc.inner_idxs[i] != refBlkDesc.inner_idxs[i]) { return false; + } + } auto actualStrides = desc.get()->format_desc.blocking.strides; auto refStrides = refDesc.get()->format_desc.blocking.strides; @@ -605,8 +623,9 @@ bool DnnlBlockedMemoryDesc::blocksExtended() const { const auto padded_dims = desc.get_padded_dims(); const auto dims = desc.get_dims(); for (int i = 0; i < desc.get_ndims(); i++) { - if (dims[i] != padded_dims[i]) + if (dims[i] != padded_dims[i]) { return true; + } } return false; } @@ -691,8 +710,9 @@ MemoryDescPtr DnnlBlockedMemoryDesc::cloneWithNewPrecision(const ov::element::Ty void DnnlBlockedMemoryDesc::recomputeDefaultStrides() { const auto& rank = getShape().getRank(); - if (order.size() != blockedDims.size()) + if (order.size() != blockedDims.size()) { OPENVINO_THROW("Can't recompute stride: order size != blocked dims size"); + } auto& oneDnnStrides = desc.get()->format_desc.blocking.strides; if (getShape().hasZeroDims()) { @@ -716,12 +736,14 @@ void DnnlBlockedMemoryDesc::recomputeDefaultStrides() { DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const dnnl::memory::desc& mdesc, const Shape& shape) : MemoryDesc(shape, DnnlBlocked) { - if (mdesc.get_format_kind() == dnnl::memory::format_kind::any) + if (mdesc.get_format_kind() == dnnl::memory::format_kind::any) { OPENVINO_THROW("Unexpected: Memory format any is prohibited!"); + } dnnl::impl::memory_desc_wrapper descWrapped(mdesc.get()); - if (!descWrapped.is_blocking_desc()) + if (!descWrapped.is_blocking_desc()) { OPENVINO_THROW("Unexpected: Can't create DnnlBlockedMemoryDesc from not blocking desc"); + } if (!shape.isCompatible(DnnlExtensionUtils::convertToVectorDims(mdesc.get_dims()))) { OPENVINO_THROW("ParameterMismatch: Can not create DnnlBlockedMemoryDesc. memory::desc dims: ", diff --git a/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp b/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp index 46e9eacf52affc..2e8067041bdf92 100644 --- a/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp +++ b/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp @@ -18,8 +18,9 @@ DnnlMemoryDesc::DnnlMemoryDesc(const dnnl::memory::desc& desc) : DnnlMemoryDesc( DnnlMemoryDesc::DnnlMemoryDesc(const_dnnl_memory_desc_t cdesc) : MemoryDesc(Shape(DnnlExtensionUtils::convertToVectorDims(cdesc->dims, cdesc->ndims)), Dnnl), desc(DnnlExtensionUtils::clone_desc(cdesc)) { - if (getFormatKind() == dnnl::memory::format_kind::any) + if (getFormatKind() == dnnl::memory::format_kind::any) { OPENVINO_THROW("Unexpected: Memory format any is prohibited!"); + } } ov::element::Type DnnlMemoryDesc::getPrecision() const { @@ -99,8 +100,9 @@ bool DnnlMemoryDesc::hasEmptyExtraData() const { } bool DnnlMemoryDesc::canComputeMemSizeZeroDims() const { - if (!getShape().hasZeroDims()) + if (!getShape().hasZeroDims()) { return false; + } dnnl::impl::memory_desc_wrapper wrapped(desc.get()); return getShape().hasZeroDims() && wrapped.offset0() != DNNL_RUNTIME_DIM_VAL; diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 13250bfabd2e10..5d4538fc669bbf 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -128,11 +128,13 @@ Node::Node(const std::shared_ptr& op, GraphContext::CPtr ctx, const Sh std::istringstream stream(primitivesPriority); std::string str; while (getline(stream, str, ',')) { - if (str.substr(0, 4) != "cpu:") + if (str.substr(0, 4) != "cpu:") { continue; + } customImplPriorities.push_back(parse_impl_name(str)); - if (customImplPriorities.back() == impl_desc_type::unknown && str != "cpu:unknown") + if (customImplPriorities.back() == impl_desc_type::unknown && str != "cpu:unknown") { OPENVINO_THROW("Unsupported CPU implementation ", str, " for node ", getName()); + } } const auto& defaultImplPriorities = getDefaultImplPriority(); customImplPriorities.insert(customImplPriorities.end(), @@ -145,8 +147,9 @@ Node::Node(const std::shared_ptr& op, GraphContext::CPtr ctx, const Sh std::istringstream stream(inputMemoryFormats); std::string str; while (getline(stream, str, ',')) { - if (str.substr(0, 4) != "cpu:") + if (str.substr(0, 4) != "cpu:") { continue; + } inputMemoryFormatsFilter.push_back(dnnl::utils::str2fmt(str.substr(4, str.size()).c_str())); } } @@ -156,8 +159,9 @@ Node::Node(const std::shared_ptr& op, GraphContext::CPtr ctx, const Sh std::istringstream stream(outputMemoryFormats); std::string str; while (getline(stream, str, ',')) { - if (str.substr(0, 4) != "cpu:") + if (str.substr(0, 4) != "cpu:") { continue; + } outputMemoryFormatsFilter.push_back(dnnl::utils::str2fmt(str.substr(4, str.size()).c_str())); } } @@ -166,8 +170,9 @@ Node::Node(const std::shared_ptr& op, GraphContext::CPtr ctx, const Sh if (it != rtInfo.end()) { enforceBF16evenForGraphTail = it->second.as(); } - if (ov::fp16_compression_is_disabled(op)) + if (ov::fp16_compression_is_disabled(op)) { keepOriginalPrecision = true; + } } Node::Node(const std::string& type, @@ -207,8 +212,9 @@ void Node::remove() { auto drop = [](const std::vector& edges) { for (auto& edge : edges) { auto edgePtr = edge.lock(); - if (!edgePtr) + if (!edgePtr) { continue; + } edgePtr->getParent()->removeChildEdge(edgePtr); edgePtr->getChild()->removeParentEdge(edgePtr); } @@ -220,8 +226,9 @@ void Node::remove() { bool Node::isEdgesEmpty(const std::vector& edges) const { for (auto& edge : edges) { - if (edge.lock()) + if (edge.lock()) { return false; + } } return true; } @@ -472,14 +479,16 @@ bool Node::canBeInPlace() const { } if (getParentEdges().size() != 1 || getParentEdgeAt(0)->getParent()->getChildEdges().size() != 1 || - (getParentEdgeAt(0)->getParent()->isConstant() && !getParentEdgeAt(0)->getChild()->isConstant())) + (getParentEdgeAt(0)->getParent()->isConstant() && !getParentEdgeAt(0)->getChild()->isConstant())) { return false; + } // TODO: we need to extend this logic to properly handle all possible inplace conflicts if (getParentEdges().size() == 1 && getParentEdgeAt(0)->getParent()->getType() == Type::Reshape) { auto reshapeNode = getParentEdgeAt(0)->getParent(); - if (reshapeNode->getParentEdgeAt(0)->getParent()->getChildEdges().size() != 1) + if (reshapeNode->getParentEdgeAt(0)->getParent()->getChildEdges().size() != 1) { return false; + } } auto inShape = getInputShapeAtPort(0); @@ -493,14 +502,16 @@ bool Node::canBeInPlace() const { void Node::resolveInPlaceEdges(Edge::LOOK look) { const NodeDesc* selected_pd = getSelectedPrimitiveDescriptor(); - if (!selected_pd) + if (!selected_pd) { OPENVINO_THROW("Cannot find selected primitive descriptor for node: ", getName()); + } if (look & Edge::LOOK_DOWN) { for (size_t i = 0; i < getParentEdges().size() && i < selected_pd->getConfig().inConfs.size(); i++) { auto inplaceOutIndx = selected_pd->getConfig().inConfs[i].inPlace(); - if (inplaceOutIndx < 0) + if (inplaceOutIndx < 0) { continue; + } auto parentEdge = getParentEdgeAt(i); OPENVINO_ASSERT(parentEdge->getStatus() == Edge::Status::NotAllocated, @@ -527,8 +538,9 @@ void Node::resolveInPlaceEdges(Edge::LOOK look) { for (size_t i = 0; i < getChildEdges().size() && i < selected_pd->getConfig().outConfs.size(); i++) { auto inplaceInpIndx = selected_pd->getConfig().outConfs[i].inPlace(); - if (inplaceInpIndx < 0) + if (inplaceInpIndx < 0) { continue; + } auto baseMemBlock = getParentEdgeAt(inplaceInpIndx)->getMemory().getMemoryBlock(); auto memBlock = std::make_shared(baseMemBlock); @@ -594,8 +606,9 @@ std::string Node::getPrimitiveDescriptorType() const { std::string str_type; auto add_type = [&](const std::string& t) { - if (!str_type.empty() && t.c_str()[0] != '_') + if (!str_type.empty() && t.c_str()[0] != '_') { str_type += "_"; + } str_type += t; }; @@ -630,10 +643,11 @@ std::string Node::getPrimitiveDescriptorType() const { #undef SEARCH_TYPE - if (type == impl_desc_type::unknown) + if (type == impl_desc_type::unknown) { str_type = "unknown"; - else if (str_type.empty()) + } else if (str_type.empty()) { str_type = "undef"; + } // adding layer precision to the performance counters as one of the token // currently we treat a layer executing in int8 mode if its input is I8 or U8. if input is U8, we still @@ -644,7 +658,7 @@ std::string Node::getPrimitiveDescriptorType() const { if (selectedPrimitiveDesc->getConfig().inConfs[0].getMemDesc()->getPrecision() != ov::element::u8) { str_type += "_" + - std::string( + static_cast( selectedPrimitiveDesc->getConfig().inConfs[0].getMemDesc()->getPrecision().get_type_name()); } else { str_type += "_I8"; @@ -653,7 +667,7 @@ std::string Node::getPrimitiveDescriptorType() const { if (selectedPrimitiveDesc->getConfig().outConfs[0].getMemDesc()->getPrecision() != ov::element::u8) { str_type += "_" + - std::string( + static_cast( selectedPrimitiveDesc->getConfig().outConfs[0].getMemDesc()->getPrecision().get_type_name()); } else { str_type += "_I8"; @@ -665,58 +679,67 @@ std::string Node::getPrimitiveDescriptorType() const { } EdgePtr Node::getParentEdgeAt(size_t idx) const { - if (idx >= parentEdges.size()) + if (idx >= parentEdges.size()) { OPENVINO_THROW("Node ", getName(), " contains less parent edges than ", idx); + } auto parentEdgePtr = parentEdges[idx].lock(); - if (!parentEdgePtr) + if (!parentEdgePtr) { OPENVINO_THROW("Node ", getName(), " contains empty parent edge for index ", idx); + } return parentEdgePtr; } EdgePtr Node::getChildEdgeAt(size_t idx) const { - if (idx >= childEdges.size()) + if (idx >= childEdges.size()) { OPENVINO_THROW("Node ", getName(), " contains less child edges than ", idx); + } auto childEdgePtr = childEdges[idx].lock(); - if (!childEdgePtr) + if (!childEdgePtr) { OPENVINO_THROW("Node ", getName(), " contains empty child edge for index ", idx); + } return childEdgePtr; } std::vector Node::getChildEdgesAtPort(int inputNum) const { - if (inputNum < 0) + if (inputNum < 0) { OPENVINO_THROW("Node ", getName(), ". negative input number is not supported ", inputNum); + } - if (static_cast(inputNum) >= outputShapes.size()) + if (static_cast(inputNum) >= outputShapes.size()) { OPENVINO_THROW("Node ", getName(), " contains less output ports than ", inputNum); + } std::vector res; for (auto& edge_w : childEdges) { auto edge = edge_w.lock(); - if (!edge) + if (!edge) { OPENVINO_THROW("Node ", getName(), " contains dead weak ptr"); - if (edge->getInputNum() == inputNum) + } + if (edge->getInputNum() == inputNum) { res.emplace_back(std::move(edge)); + } } return res; } std::vector Node::getAvailableFormatsForDims(const Shape& dims) const { - if (dims.getRank() == 0) + if (dims.getRank() == 0) { return {memory::format_tag::x}; - else if (dims.getRank() == 1) + } else if (dims.getRank() == 1) { return {memory::format_tag::x}; - else if (dims.getRank() == 2) + } else if (dims.getRank() == 2) { return {memory::format_tag::nc}; - else if (dims.getRank() == 3) + } else if (dims.getRank() == 3) { return {memory::format_tag::tnc, memory::format_tag::ntc, memory::format_tag::ncw, memory::format_tag::nCw8c, memory::format_tag::nCw16c}; - else if (dims.getRank() == 4) + } else if (dims.getRank() == 4) { return {memory::format_tag::nchw, memory::format_tag::nChw8c, memory::format_tag::nChw16c}; - else if (dims.getRank() == 5) + } else if (dims.getRank() == 5) { return {memory::format_tag::ncdhw, memory::format_tag::nCdhw8c, memory::format_tag::nCdhw16c}; + } return {memory::format_tag::any}; } @@ -745,8 +768,9 @@ void Node::updateShapes() { } } else { // guard check for internal dynamic nodes to avoid possible overestimation of the required memory size - if (shapeInference && FULL_PORT_MASK == shapeInference->get_port_mask()) + if (shapeInference && FULL_PORT_MASK == shapeInference->get_port_mask()) { return; + } for (auto&& edge : getChildEdges()) { auto edge_ptr = edge.lock(); @@ -864,8 +888,9 @@ void Node::redefineOutputMemory(const size_t port, const VectorDims& new_output_ } void Node::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } auto addSupportedPrimitiveDescriptor = [&](const dnnl::primitive_desc& prim_desc) { std::vector inConfs, outConfs; @@ -932,14 +957,16 @@ void Node::initSupportedPrimitiveDescriptors() { // fallback. if none of the primitive types is present in the priority list just add first implementation // @todo this fallback is not necessary if primitive priority list is filled correctly - if (supportedPrimitiveDescriptors.empty()) + if (supportedPrimitiveDescriptors.empty()) { addSupportedPrimitiveDescriptor(first_desc); + } } } void Node::filterSupportedPrimitiveDescriptors() { - if (inputMemoryFormatsFilter.empty() && outputMemoryFormatsFilter.empty()) + if (inputMemoryFormatsFilter.empty() && outputMemoryFormatsFilter.empty()) { return; + } // Compare by format tag auto areCompatible = [](const MemoryDesc& desc, dnnl::memory::format_tag fmt) -> bool { @@ -951,8 +978,9 @@ void Node::filterSupportedPrimitiveDescriptors() { auto isNotSuitableDesc = [&](const NodeDesc& desc) { const auto& config = desc.getConfig(); if (inputMemoryFormatsFilter.size() > config.inConfs.size() || - outputMemoryFormatsFilter.size() > config.outConfs.size()) + outputMemoryFormatsFilter.size() > config.outConfs.size()) { OPENVINO_THROW("Incorrect number of input or output memory formats"); + } for (size_t i = 0; i < inputMemoryFormatsFilter.size(); i++) { if (!areCompatible(*config.inConfs[i].getMemDesc(), inputMemoryFormatsFilter[i])) { @@ -1000,17 +1028,20 @@ void Node::initDescriptor(const NodeConfig& config) { if (descs.empty()) { const auto& selectedConfig = selectedPD->getConfig(); if (selectedConfig.inConfs.size() != config.inConfs.size() || - selectedConfig.outConfs.size() != config.outConfs.size()) + selectedConfig.outConfs.size() != config.outConfs.size()) { return; + } for (size_t i = 0; i < selectedConfig.inConfs.size(); i++) { - if (!selectedConfig.inConfs[i].getPortDesc()->isCompatible(*config.inConfs[i].getPortDesc())) + if (!selectedConfig.inConfs[i].getPortDesc()->isCompatible(*config.inConfs[i].getPortDesc())) { OPENVINO_THROW("Incorrect descriptor for node: ", getName(), " on ", i, " intput port"); + } } for (size_t i = 0; i < selectedConfig.outConfs.size(); i++) { - if (!selectedConfig.outConfs[i].getPortDesc()->isCompatible(*config.outConfs[i].getPortDesc())) + if (!selectedConfig.outConfs[i].getPortDesc()->isCompatible(*config.outConfs[i].getPortDesc())) { OPENVINO_THROW("Incorrect descriptor for node: ", getName(), " on ", i, " output port"); + } } selectedPD->setConfig(config); @@ -1039,12 +1070,14 @@ void Node::initDescriptor(const NodeConfig& config) { std::vector inDescs; inDescs.reserve(config.inConfs.size()); - for (const auto& inConf : config.inConfs) + for (const auto& inConf : config.inConfs) { inDescs.emplace_back(inConf.getMemDesc()); + } std::vector outDescs; outDescs.reserve(config.outConfs.size()); - for (const auto& outConf : config.outConfs) + for (const auto& outConf : config.outConfs) { outDescs.emplace_back(outConf.getMemDesc()); + } createDescriptor(inDescs, outDescs); for (auto& desc : descs) { @@ -1114,18 +1147,21 @@ void Node::prepareMemory(const std::vector& intDescs) { void Node::prepareMemory(dnnl::primitive_desc_iterator& itpd) { std::vector intDescs; intDescs.reserve(internalBlobDesc.size()); - for (auto& it : internalBlobDesc) + for (auto& it : internalBlobDesc) { intDescs.push_back(it(itpd, 0)); + } Node::prepareMemory(intDescs); } MemoryPtr Node::prepareWeightMemory(DnnlMemoryDescPtr dstWeightDesc, DnnlMemoryDescPtr srcWeightDesc) { - if (!getParentEdgeAt(1)->getParent()->isConstant()) + if (!getParentEdgeAt(1)->getParent()->isConstant()) { OPENVINO_THROW("Weight input is not const for node ", getName(), "."); + } auto edgeMem = getSrcMemoryAtPort(1); - if (!edgeMem) + if (!edgeMem) { OPENVINO_THROW("Cannot get const weights edgeMem for node ", getName(), "."); + } if (!srcWeightDesc) { auto constDnnlMemOutDesc = edgeMem->getDescWithType(); @@ -1166,15 +1202,17 @@ MemoryPtr Node::prepareWeightMemory(DnnlMemoryDescPtr dstWeightDesc, DnnlMemoryD } void Node::toNumaNode(int numaNodeID) { - if (numaNodeID < 0) + if (numaNodeID < 0) { return; + } return toNumaNodeImpl(numaNodeID); } void Node::toNumaNodeImpl(int numaNodeID) { - if (curNumaNode == numaNodeID) + if (curNumaNode == numaNodeID) { return; + } // create scratch pad from specified numa node if (scratchpadMem) { @@ -1183,10 +1221,12 @@ void Node::toNumaNodeImpl(int numaNodeID) { } // mbind constant prim args to numa nodes - if (primArgs.count(DNNL_ARG_WEIGHTS)) + if (primArgs.count(DNNL_ARG_WEIGHTS)) { mbind_move(primArgs[DNNL_ARG_WEIGHTS], numaNodeID); - if (primArgs.count(DNNL_ARG_BIAS)) + } + if (primArgs.count(DNNL_ARG_BIAS)) { mbind_move(primArgs[DNNL_ARG_BIAS], numaNodeID); + } curNumaNode = numaNodeID; } @@ -1194,8 +1234,9 @@ void Node::toNumaNodeImpl(int numaNodeID) { bool Node::isInPlace() const { if (inplace == InPlaceType::Unknown) { auto selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) + if (selected_pd == nullptr) { OPENVINO_THROW("Preferable primitive descriptor is not set."); + } inplace = InPlaceType::NoInPlace; auto config = selected_pd->getConfig(); @@ -1225,8 +1266,9 @@ bool Node::isConstant() { } void Node::updateConstantType() { - if (constant == ConstantType::StrictNoConst) + if (constant == ConstantType::StrictNoConst) { return; + } bool isConst = true; for (const auto& parentEdge : getParentEdges()) { @@ -1235,8 +1277,9 @@ void Node::updateConstantType() { const auto prevConstantType = constant; constant = isConst ? ConstantType::Const : ConstantType::NoConst; - if (constant == prevConstantType) + if (constant == prevConstantType) { return; // state has not changed, no reason to continue + } for (const auto& childEdge : getChildEdges()) { const auto childNode = childEdge.lock()->getChild(); @@ -1245,8 +1288,9 @@ void Node::updateConstantType() { } void Node::addOriginalLayer(const std::string& layerName) { - if (layerName.empty()) + if (layerName.empty()) { return; + } if (originalLayers.empty()) { originalLayers = layerName; } else { @@ -1294,8 +1338,9 @@ const std::vector& Node::getDefaultImplPriority() { } const std::vector& Node::getImplPriority() { - if (!customImplPriorities.empty()) + if (!customImplPriorities.empty()) { return customImplPriorities; + } return getDefaultImplPriority(); } @@ -1319,9 +1364,10 @@ PortDescBasePtr Node::getConsistentInputDesc(const NodeConfig& config, size_t id } auto* parentSelectedPD = getParentEdgeAt(idx)->getParent()->getSelectedPrimitiveDescriptor(); - if (!parentSelectedPD) + if (!parentSelectedPD) { OPENVINO_THROW("Cannot get selected primitive descriptor for node: ", getParentEdgeAt(idx)->getParent()->getName()); + } int num = getParentEdgeAt(idx)->getInputNum(); if (num >= 0) { @@ -1329,8 +1375,9 @@ PortDescBasePtr Node::getConsistentInputDesc(const NodeConfig& config, size_t id const auto desc = parentConf.getMemDesc()->cloneWithNewPrecision(inConf.getMemDesc()->getPrecision()); parentConf.setMemDesc(desc); - if (!parentConf.getMemDesc()->isDefined() && parentConf.inPlace() >= 0) + if (!parentConf.getMemDesc()->isDefined() && parentConf.inPlace() >= 0) { getParentEdgeAt(idx)->getParent()->initOptimalPrimitiveDescriptor(); + } // config might be changed parentConf = parentSelectedPD->getConfig().outConfs[num]; @@ -1361,9 +1408,10 @@ PortDescBasePtr Node::getConsistentOutputDesc(const NodeConfig& config, size_t i } auto* childSelectedPD = getChildEdgeAt(idx)->getChild()->getSelectedPrimitiveDescriptor(); - if (!childSelectedPD) + if (!childSelectedPD) { OPENVINO_THROW("Cannot get selected primitive descriptor for node: ", getChildEdgeAt(idx)->getChild()->getName()); + } int num = getChildEdgeAt(idx)->getOutputNum(); if (num >= 0) { @@ -1371,8 +1419,9 @@ PortDescBasePtr Node::getConsistentOutputDesc(const NodeConfig& config, size_t i const auto desc = childConf.getMemDesc()->cloneWithNewPrecision(outConf.getMemDesc()->getPrecision()); childConf.setMemDesc(desc); - if (!childConf.getMemDesc()->isDefined() && childConf.inPlace() >= 0) + if (!childConf.getMemDesc()->isDefined() && childConf.inPlace() >= 0) { getChildEdgeAt(idx)->getChild()->initOptimalPrimitiveDescriptor(); + } // config might be changed childConf = childSelectedPD->getConfig().inConfs[num]; @@ -1385,12 +1434,14 @@ PortDescBasePtr Node::getConsistentOutputDesc(const NodeConfig& config, size_t i } void Node::initOptimalPrimitiveDescriptor() { - if (one_of(getType(), Type::RNNCell, Type::RNNSeq)) // can be skipped for RNN node + if (one_of(getType(), Type::RNNCell, Type::RNNSeq)) { // can be skipped for RNN node return; + } auto selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) + if (selected_pd == nullptr) { OPENVINO_THROW("Preferable primitive descriptor is not set for ", getName()); + } auto config = selected_pd->getConfig(); for (size_t i = 0; i < config.inConfs.size(); i++) { @@ -1424,8 +1475,9 @@ void Node::initOptimalPrimitiveDescriptor() { bool Node::isConfigDefined(const NodeConfig& config) const { for (const auto& configs : {config.inConfs, config.outConfs}) { for (const auto& dc : configs) { - if (!dc.getMemDesc()->isDefined()) + if (!dc.getMemDesc()->isDefined()) { return false; + } } } return true; @@ -1455,8 +1507,9 @@ void Node::appendPostOpArgs(const dnnl::primitive_attr& attr, bool Node::isFusedWith(Type fusedNodeType) const { for (const auto& fusedNode : fusedWith) { - if (fusedNode->type == fusedNodeType) + if (fusedNode->type == fusedNodeType) { return true; + } } return false; @@ -1558,8 +1611,9 @@ Node* Node::NodesFactory::create(const std::shared_ptr& op, const Grap if (newNode == nullptr) { try { std::unique_ptr ol(createNodeIfRegistered(intel_cpu, TypeFromName(op->get_type_name()), op, context)); - if (ol != nullptr && ol->created()) + if (ol != nullptr && ol->created()) { newNode = ol.release(); + } } catch (const ov::Exception& ex) { if (dynamic_cast(&ex) != nullptr) { errorMessage += getExceptionDescWithoutStatus(ex); @@ -1572,13 +1626,15 @@ Node* Node::NodesFactory::create(const std::shared_ptr& op, const Grap if (newNode == nullptr) { try { std::unique_ptr ol(new Reference(op, context, errorMessage)); - if (ol != nullptr && ol->created()) + if (ol != nullptr && ol->created()) { newNode = ol.release(); + } } catch (const ov::Exception& ex) { if (dynamic_cast(&ex) != nullptr) { const auto currErrorMess = getExceptionDescWithoutStatus(ex); - if (!currErrorMess.empty()) + if (!currErrorMess.empty()) { errorMessage += errorMessage.empty() ? currErrorMess : "\n" + currErrorMess; + } } else { throw; } @@ -1624,12 +1680,14 @@ bool Node::canBePerformedAsScaleShift(const Node* parentNode) const { const auto isBroadcastableToDataInput = [&]() { auto& dataShape = getInputShapeAtPort(fusingPort).getDims(); for (size_t i = 0; i < getParentEdges().size(); i++) { - if (i == fusingPort) + if (i == fusingPort) { continue; + } auto& weightShape = getInputShapeAtPort(i).getDims(); if (getParentEdgeAt(i)->getParent()->getChildEdges().size() != 1 || - !isPerTensorOrPerChannelBroadcastable(dataShape, weightShape, channelAxis, true)) + !isPerTensorOrPerChannelBroadcastable(dataShape, weightShape, channelAxis, true)) { return false; + } } return true; }; @@ -1765,16 +1823,18 @@ bool Node::isOutputTensorAtPortEmpty(size_t port) const { bool Node::hasEmptyInputTensors() const { for (size_t i = 0; i < getParentEdges().size(); i++) { - if (isInputTensorAtPortEmpty(i)) + if (isInputTensorAtPortEmpty(i)) { return true; + } } return false; } bool Node::hasEmptyOutputTensors() const { for (size_t i = 0; i < outputShapes.size(); i++) { - if (isOutputTensorAtPortEmpty(i)) + if (isOutputTensorAtPortEmpty(i)) { return true; + } } return false; } @@ -1807,14 +1867,16 @@ bool Node::needPrepareParams() const { bool Node::inputShapesModified() const { if (lastInputDims.size() != getParentEdges().size()) { - if (lastInputDims.empty()) + if (lastInputDims.empty()) { return true; + } OPENVINO_THROW("Input dims and parent edges number mismatch!"); } for (size_t i = 0; i < lastInputDims.size(); i++) { - if (lastInputDims[i] != getParentEdgeAt(i)->getMemory().getStaticDims()) + if (lastInputDims[i] != getParentEdgeAt(i)->getMemory().getStaticDims()) { return true; + } } return false; } @@ -1829,8 +1891,9 @@ std::vector Node::shapeInferGeneric(const std::vector& shapes auto input_value_port_mask = shapeInference->get_port_mask(); input_shapes.reserve(shapes.size()); - for (size_t i = 0; i < shapes.size(); i++) + for (size_t i = 0; i < shapes.size(); i++) { input_shapes.emplace_back(std::ref(shapes[i].getStaticDims())); + } std::unordered_map input_values; if (input_value_port_mask) { @@ -1857,8 +1920,9 @@ IShapeInfer::Result Node::shapeInfer() const { auto input_value_port_mask = shapeInference->get_port_mask(); input_shapes.reserve(inputShapes.size()); - for (size_t port = 0; port < inputShapes.size(); ++port) + for (size_t port = 0; port < inputShapes.size(); ++port) { input_shapes.emplace_back(std::ref(getParentEdgeAt(port)->getMemory().getStaticDims())); + } std::unordered_map input_values; if (input_value_port_mask) { @@ -1874,13 +1938,15 @@ IShapeInfer::Result Node::shapeInfer() const { void Node::updateLastInputDims() { if (lastInputDims.size() != getParentEdges().size()) { - if (!lastInputDims.empty()) + if (!lastInputDims.empty()) { OPENVINO_THROW("Input dims and parent edges number mismatch!"); + } lastInputDims.resize(getParentEdges().size()); } - for (size_t i = 0; i < lastInputDims.size(); i++) + for (size_t i = 0; i < lastInputDims.size(); i++) { lastInputDims[i] = getParentEdgeAt(i)->getMemory().getDesc().getShape().getDims(); + } } bool Node::canFuseSimpleOperation(const NodePtr& node) const { @@ -1911,8 +1977,9 @@ void Node::addSupportedPrimDesc(const std::vector& inPortConfi // In order to simplify particular node initialization logic we just don't add config in case target shape is // not supported by blockedDescCreator. This should be suitable for major of scenarios since almost all nodes // add `ncsp` blockedDescCreator which supports any shape rank. - if (shape.getRank() < portConfigurator.blockedDescCreator->getMinimalRank()) + if (shape.getRank() < portConfigurator.blockedDescCreator->getMinimalRank()) { return false; + } PortConfig portConfig; portConfig.inPlace(portConfigurator.inPlace); @@ -1929,24 +1996,27 @@ void Node::addSupportedPrimDesc(const std::vector& inPortConfi auto shape = inPortConfigs[i].shape.getRank() == 0 ? getInputShapeAtPort(i) : inPortConfigs[i].shape; auto prc = inPortConfigs[i].prc == ov::element::undefined ? getOriginalInputPrecisionAtPort(i) : inPortConfigs[i].prc; - if (!fill_port(inPortConfigs[i], shape, prc, config.inConfs)) + if (!fill_port(inPortConfigs[i], shape, prc, config.inConfs)) { return; + } } for (size_t i = 0; i < outPortConfigs.size(); i++) { auto dims = outPortConfigs[i].shape.getRank() == 0 ? getOutputShapeAtPort(i) : outPortConfigs[i].shape; auto prc = outPortConfigs[i].prc == ov::element::undefined ? getOriginalOutputPrecisionAtPort(i) : outPortConfigs[i].prc; - if (!fill_port(outPortConfigs[i], dims, prc, config.outConfs)) + if (!fill_port(outPortConfigs[i], dims, prc, config.outConfs)) { return; + } } supportedPrimitiveDescriptors.emplace_back(config, implType); } void Node::fuseDQScales(const float* scaleData, const size_t scaleSize) { - if (DQScales.empty()) + if (DQScales.empty()) { DQScales.resize(scaleSize, 1.0); + } OPENVINO_ASSERT(scaleSize == 1 || DQScales.size() == 1 || DQScales.size() == scaleSize, "set invalid scales size , DQScales vector size: ", DQScales.size(), @@ -1954,8 +2024,9 @@ void Node::fuseDQScales(const float* scaleData, const size_t scaleSize) { scaleSize, "Node: ##", getName()); - if (scaleSize > DQScales.size()) + if (scaleSize > DQScales.size()) { DQScales.resize(scaleSize, DQScales[0]); + } if (1 == scaleSize) { std::transform(DQScales.begin(), DQScales.end(), DQScales.begin(), [=](float val) { return (scaleData[0] * val); @@ -1967,8 +2038,9 @@ void Node::fuseDQScales(const float* scaleData, const size_t scaleSize) { } if (std::all_of(DQScales.begin(), DQScales.end(), [OV_CAPTURE_CPY_AND_THIS](float val) { return (val == DQScales[0]); - })) + })) { DQScales.resize(1); + } } int Node::inPlaceInputPort(int portIdx) const { @@ -1978,8 +2050,9 @@ int Node::inPlaceInputPort(int portIdx) const { } const NodeDesc* selected_pd = getSelectedPrimitiveDescriptor(); - if (!selected_pd) + if (!selected_pd) { OPENVINO_THROW("Cannot find selected primitive descriptor for node: ", getName()); + } const auto& conf = selected_pd->getConfig(); @@ -2000,8 +2073,9 @@ int Node::inPlaceOutPort(int portIdx) const { } const NodeDesc* selected_pd = getSelectedPrimitiveDescriptor(); - if (!selected_pd) + if (!selected_pd) { OPENVINO_THROW("Cannot find selected primitive descriptor for node: ", getName()); + } const auto& conf = selected_pd->getConfig(); @@ -2084,8 +2158,9 @@ void Node::resolveInPlaceDirection() { auto downstreamPeers = [&] { for (auto& peerEdge : pParent->getChildEdgesAtPort(pEdge->getInputNum())) { auto peerNode = peerEdge->getChild().get(); - if (peerNode == this) + if (peerNode == this) { continue; + } if (inPlaceDirection(peerNode, PortType::INPUT, peerEdge->getOutputNum()) == InplaceDirectionType::DOWN) { return true; @@ -2148,8 +2223,9 @@ void Node::resolveInPlaceDirection() { // note: there are only non-inplace or cyclic-inplace descendants at the moment. std::function searchReferencingOutput; searchReferencingOutput = [&](const Node* node, int portIdx) -> void { - if (numConflicts > 1) + if (numConflicts > 1) { return; // early stop + } auto childEdges = node->getChildEdgesAtPort(portIdx); for (auto& edge : childEdges) { auto pChild = edge->getChild().get(); @@ -2172,8 +2248,9 @@ void Node::resolveInPlaceDirection() { // note: the parent node does not use inPlace memory at the moment, let's check the siblings for (auto& peerEdge : pParent->getChildEdgesAtPort(pEdge->getInputNum())) { auto peerNode = peerEdge->getChild().get(); - if (peerNode == this) + if (peerNode == this) { continue; + } if (Type::Output == peerNode->getType()) { numConflicts++; } else { diff --git a/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp b/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp index 274259a4e279ef..0485642bf89e87 100644 --- a/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp @@ -67,10 +67,12 @@ AdaptivePooling::AdaptivePooling(const std::shared_ptr& op, const Grap } void AdaptivePooling::getSupportedDescriptors() { - if (getParentEdges().size() != 2) + if (getParentEdges().size() != 2) { THROW_CPU_NODE_ERR("has incorrect number of input edges: ", getParentEdges().size()); - if (getChildEdges().size() < (algorithm == Algorithm::AdaptivePoolingMax ? 2 : 1)) + } + if (getChildEdges().size() < (algorithm == Algorithm::AdaptivePoolingMax ? 2 : 1)) { THROW_CPU_NODE_ERR("has incorrect number of output edges: ", getChildEdges().size()); + } auto srcRank = getInputShapeAtPort(0).getRank(); if (!one_of(spatialDimsCount, 1, 2, 3)) { @@ -100,8 +102,9 @@ bool AdaptivePooling::needShapeInfer() const { } void AdaptivePooling::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } // we supports only fp32 currently precision = ov::element::f32; @@ -133,8 +136,9 @@ void AdaptivePooling::executeDynamicImpl(const dnnl::stream& strm) { void AdaptivePooling::execute(const dnnl::stream& strm) { auto inputPrec = getParentEdgeAt(0)->getMemory().getDataType(); auto outputPrec = getChildEdgeAt(0)->getMemory().getDataType(); - if (!(inputPrec == dnnl_f32 && outputPrec == dnnl_f32)) + if (!(inputPrec == dnnl_f32 && outputPrec == dnnl_f32)) { THROW_CPU_NODE_ERR("doesn't support demanded precisions"); + } auto& srcMemory0 = getParentEdgeAt(0)->getMemory(); auto& srcMemory1 = getParentEdgeAt(1)->getMemory(); @@ -156,12 +160,13 @@ void AdaptivePooling::execute(const dnnl::stream& strm) { const auto* srcPooledSpatialShapes = getSrcDataAtPortAs(1); auto* dst = getDstDataAtPortAs(0); - if (static_cast(srcMemory1.getShape().getElementsCount()) != spatialDimsCount) + if (static_cast(srcMemory1.getShape().getElementsCount()) != spatialDimsCount) { THROW_CPU_NODE_ERR("has input spatial dimension (", srcMemory1.getShape().getElementsCount(), ") inconsistent with pooling vector size (", spatialDimsCount, ")"); + } auto inputDimVector = srcMemory0.getStaticDims(); const int N = static_cast(inputDimVector[0]); @@ -181,8 +186,9 @@ void AdaptivePooling::execute(const dnnl::stream& strm) { blockSize * (isBlkFmt ? srcBlockDesc->getBlockDims()[1] : srcMemory0.getShape().getStaticDims()[1]); const int blockCount = (isTailCFmt ? 1 : chPadding / blockSize); auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); - if (!selectedPrimitiveDescriptor) + if (!selectedPrimitiveDescriptor) { THROW_CPU_NODE_ERR("doesn't have primitive descriptors."); + } auto config = selectedPrimitiveDescriptor->getConfig(); auto srcStrides = srcBlockDesc->getStrides(); auto dstStrides = getChildEdgeAt(0)->getMemory().getDescWithType()->getStrides(); @@ -227,8 +233,9 @@ void AdaptivePooling::execute(const dnnl::stream& strm) { setBinBorders(&hStart, &hEnd, oh, IH, OH); setBinBorders(&wStart, &wEnd, ow, IW, OW); auto binSize = (dEnd - dStart) * (hEnd - hStart) * (wEnd - wStart); - if (binSize == 0) + if (binSize == 0) { THROW_CPU_NODE_ERR("has empty bin"); + } float sum = 0; for (size_t pixD = dStart; pixD < dEnd; pixD++) { for (size_t pixH = hStart; pixH < hEnd; pixH++) { diff --git a/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp b/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp index deffe60668de01..35a2d829bbc7fe 100644 --- a/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp +++ b/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp @@ -37,26 +37,31 @@ BatchToSpace::BatchToSpace(const std::shared_ptr& op, const GraphConte OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } - if (inputShapes.size() != 4 || outputShapes.size() != 1) + if (inputShapes.size() != 4 || outputShapes.size() != 1) { THROW_CPU_NODE_ERR("has incorrect number of input or output edges!"); + } const auto& inDims = getInputShapeAtPort(0).getDims(); const auto& outDims = getOutputShapeAtPort(0).getDims(); - if (inDims.size() < 4 || inDims.size() > 5) + if (inDims.size() < 4 || inDims.size() > 5) { THROW_CPU_NODE_ERR("has unsupported 'data' input rank: ", inDims.size()); - if (inDims.size() != outDims.size()) + } + if (inDims.size() != outDims.size()) { THROW_CPU_NODE_ERR("has incorrect number of input/output dimensions"); + } } void BatchToSpace::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } const auto& inDims = getInputShapeAtPort(0).getDims(); const auto precision = getOriginalInputPrecisionAtPort(0); const std::set supported_precision_sizes = {1, 2, 4, 8}; - if (supported_precision_sizes.find(precision.size()) == supported_precision_sizes.end()) + if (supported_precision_sizes.find(precision.size()) == supported_precision_sizes.end()) { THROW_CPU_NODE_ERR("has unsupported precision: ", precision.get_type_name()); + } addSupportedPrimDesc({{LayoutType::nspc, precision}, {LayoutType::ncsp, ov::element::i32}, diff --git a/src/plugins/intel_cpu/src/nodes/bin_conv.cpp b/src/plugins/intel_cpu/src/nodes/bin_conv.cpp index dab539575723f7..722719e687c1e6 100644 --- a/src/plugins/intel_cpu/src/nodes/bin_conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/bin_conv.cpp @@ -120,10 +120,11 @@ struct jit_uni_bin_conv_kernel_f32 : public jit_uni_bin_conv_kernel, public jit_ if (jcp_.with_dw_conv) { add(reg_output_base, jcp_.oc_block * jcp_dw_conv_.kh * jcp_.ow * jcp_.typesize_out); } else { - if (jcp_.with_binarization) + if (jcp_.with_binarization) { add(reg_output_base, div_up(jcp_.oc_block, nbits) * jcp_.typesize_out); - else + } else { add(reg_output_base, jcp_.oc_block * jcp_.typesize_out); + } } add(reg_oc_off, jcp_.oc_block * sizeof(float)); @@ -133,8 +134,9 @@ struct jit_uni_bin_conv_kernel_f32 : public jit_uni_bin_conv_kernel, public jit_ L(tail_label); - if (jcp_.oc % jcp_.oc_block != 0) + if (jcp_.oc % jcp_.oc_block != 0) { solve_common(1, jcp_.oc % jcp_.oc_block); + } L(exit_label); @@ -142,8 +144,9 @@ struct jit_uni_bin_conv_kernel_f32 : public jit_uni_bin_conv_kernel, public jit_ prepare_table(); - for (auto& inj : eltwise_injectors) + for (auto& inj : eltwise_injectors) { inj->prepare_table(); + } } private: @@ -254,8 +257,9 @@ struct jit_uni_bin_conv_kernel_f32 : public jit_uni_bin_conv_kernel, public jit_ assert(!"unsupported data type"); } - if (type_in != data_type::f32) + if (type_in != data_type::f32) { uni_vcvtdq2ps(vmm_in, vmm_in); + } } void store_dst(const Xbyak::Address& op, Vmm vmm_dst, bool scalar_store) { @@ -275,8 +279,9 @@ struct jit_uni_bin_conv_kernel_f32 : public jit_uni_bin_conv_kernel, public jit_ case memory::data_type::s8: uni_vpackssdw(vmm_dst, vmm_dst, vmm_dst); - if (isa != x64::sse41 && !scalar_store) + if (isa != x64::sse41 && !scalar_store) { vpermq(ymm_dst, ymm_dst, 0x08); + } uni_vpacksswb(xmm_dst, xmm_dst, xmm_dst); @@ -284,18 +289,20 @@ struct jit_uni_bin_conv_kernel_f32 : public jit_uni_bin_conv_kernel, public jit_ movq(reg_tmp_64, xmm_dst); mov(op, reg_tmp_8); } else { - if (isa != x64::sse41) + if (isa != x64::sse41) { vmovq(op, xmm_dst); - else + } else { movd(op, xmm_dst); + } } break; case memory::data_type::u8: case memory::data_type::bin: uni_vpackusdw(vmm_dst, vmm_dst, vmm_dst); - if (isa != x64::sse41 && !scalar_store) + if (isa != x64::sse41 && !scalar_store) { vpermq(ymm_dst, ymm_dst, 0x08); + } uni_vpackuswb(xmm_dst, xmm_dst, xmm_dst); @@ -303,10 +310,11 @@ struct jit_uni_bin_conv_kernel_f32 : public jit_uni_bin_conv_kernel, public jit_ movq(reg_tmp_64, xmm_dst); mov(op, reg_tmp_8); } else { - if (isa != x64::sse41) + if (isa != x64::sse41) { vmovq(op, xmm_dst); - else + } else { movd(op, xmm_dst); + } } break; @@ -363,8 +371,9 @@ struct jit_uni_bin_conv_kernel_f32 : public jit_uni_bin_conv_kernel, public jit_ uni_vmovups(vmm_tmp, ptr[aux1_reg_kernel + ker_off]); uni_vpxor(vmm_tmp, vmm_tmp, vmm_src); - if (jcp_.ic_padded != jcp_.ic && last_icb && ifm2 == (ic_blocks - 1)) + if (jcp_.ic_padded != jcp_.ic && last_icb && ifm2 == (ic_blocks - 1)) { uni_vandps(vmm_tmp, vmm_tmp, ptr[reg_table + 7 * vlen]); + } if (mayiuse(x64::avx512_vpopcnt)) { vpopcntd(vmm_tmp, vmm_tmp); @@ -524,12 +533,15 @@ struct jit_uni_bin_conv_kernel_f32 : public jit_uni_bin_conv_kernel, public jit_ int nbits = 8; int repeats = isa == x64::sse41 && oc_step > (jcp_.oc_block / 2) ? 2 : 1; - for (int r = 0; r < repeats; r++) - for (int ii = 0; ii < oc_blocks; ii++) - for (int jj = 0; jj < ur_w; jj++) + for (int r = 0; r < repeats; r++) { + for (int ii = 0; ii < oc_blocks; ii++) { + for (int jj = 0; jj < ur_w; jj++) { uni_vpxor(Vmm(1 + r * jcp_.ur_w * jcp_.nb_oc_blocking + ur_w * ii + jj), Vmm(1 + r * jcp_.ur_w * jcp_.nb_oc_blocking + ur_w * ii + jj), Vmm(1 + r * jcp_.ur_w * jcp_.nb_oc_blocking + ur_w * ii + jj)); + } + } + } kh_loop(ur_w, pad_l, pad_r, oc_blocks, oc_step); @@ -550,8 +562,9 @@ struct jit_uni_bin_conv_kernel_f32 : public jit_uni_bin_conv_kernel, public jit_ mov(reg_tmp_32, jcp_.ic); imul(reg_tmp_32, ptr[param1 + GET_OFF(kh_padding)]); - for (int jj = 0; jj < ur_w; jj++) + for (int jj = 0; jj < ur_w; jj++) { kw_padding[jj] = 0; + } for (int ki = 0; ki < jcp_.kw; ki++) { int jj_start = nstl::max(0, div_up(pad_l - ki * (jcp_.dilate_w + 1), jcp_.stride_w)); @@ -739,19 +752,21 @@ struct jit_uni_bin_conv_kernel_f32 : public jit_uni_bin_conv_kernel, public jit_ if (isa == x64::avx512_core) { size_t o_off; - if (jcp_.with_dw_conv) + if (jcp_.with_dw_conv) { o_off = jj * jcp_.oc_block; - else + } else { o_off = jj * jcp_.oc * jcp_.ngroups; + } uni_vmovups(ptr[reg_output + o_off * jcp_.typesize_out], vmm_dst | ktail_mask); } else { for (int oc = 0; oc < tail_size; oc++) { size_t o_off; - if (jcp_.with_dw_conv) + if (jcp_.with_dw_conv) { o_off = jj * jcp_.oc_block + oc + r * (jcp_.oc_block / 2); - else + } else { o_off = jj * jcp_.oc * jcp_.ngroups + r * (jcp_.oc_block / 2) + oc; + } store_dst(ptr[reg_output + o_off * jcp_.typesize_out], vmm_dst, true); @@ -772,11 +787,12 @@ struct jit_uni_bin_conv_kernel_f32 : public jit_uni_bin_conv_kernel, public jit_ Vmm vmm_dst = Vmm(1 + r * jcp_.ur_w * jcp_.nb_oc_blocking + ur_w * ii + jj); size_t o_off; - if (jcp_.with_dw_conv) - o_off = ((size_t)ii * jcp_dw_conv_.kh * jcp_.ow + jj) * jcp_.oc_block + + if (jcp_.with_dw_conv) { + o_off = (static_cast(ii) * jcp_dw_conv_.kh * jcp_.ow + jj) * jcp_.oc_block + r * (jcp_.oc_block / 2); - else + } else { o_off = ii * jcp_.oc_block + jj * jcp_.oc * jcp_.ngroups + r * (jcp_.oc_block / 2); + } store_dst(ptr[reg_output + o_off * jcp_.typesize_out], vmm_dst, false); } @@ -804,8 +820,9 @@ struct jit_uni_bin_conv_kernel_f32 : public jit_uni_bin_conv_kernel, public jit_ int l_pad = jcp_.l_pad; int r_pad = nstl::max(0, (jcp_.ow - 1) * str_w + (kw - 1) * dilate_w - (iw + l_pad - 1)); int r_pad1 = (ur_w * n_oi - 1) * str_w + (kw - 1) * dilate_w - (iw + l_pad - 1); - if (r_pad1 > 0) + if (r_pad1 > 0) { n_oi--; + } mov(reg_input, reg_input_base); mov(reg_output, reg_output_base); @@ -817,10 +834,11 @@ struct jit_uni_bin_conv_kernel_f32 : public jit_uni_bin_conv_kernel, public jit_ if (l_pad > 0) { n_oi--; - if (n_oi < 0 && r_pad1 > 0) + if (n_oi < 0 && r_pad1 > 0) { width_blk_step(ur_w, l_pad, r_pad1, oc_blocks, oc_step); // "lrpad" - else + } else { width_blk_step(ur_w, l_pad, 0, oc_blocks, oc_step); // "lpad" + } add(reg_input, jcp_.typesize_in * (ur_w * str_w - l_pad) * inp_mult); add(reg_output, jcp_.typesize_out * ur_w * out_mult); } @@ -846,8 +864,9 @@ struct jit_uni_bin_conv_kernel_f32 : public jit_uni_bin_conv_kernel, public jit_ add(reg_output, jcp_.typesize_out * ur_w * out_mult); } - if (ur_w_tail != 0) + if (ur_w_tail != 0) { width_blk_step(ur_w_tail, 0, r_pad, oc_blocks, oc_step); // "tail" + } pop(reg_oc_off); pop(reg_oc_work); @@ -978,11 +997,13 @@ void BinaryConvolution::getSupportedDescriptors() { } } - if (getParentEdges().size() != expectedInputEdgesNum) + if (getParentEdges().size() != expectedInputEdgesNum) { THROW_CPU_NODE_ERR("has incorrect number of input edges"); + } - if (getChildEdges().empty()) + if (getChildEdges().empty()) { THROW_CPU_NODE_ERR("has incorrect number of output edges"); + } if (getInputShapeAtPort(0).getRank() != 4) { THROW_CPU_NODE_ERR("doesn't support 0th input with rank: ", getInputShapeAtPort(0).getRank()); @@ -998,8 +1019,9 @@ void BinaryConvolution::getSupportedDescriptors() { } void BinaryConvolution::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } setPostOps(attr); @@ -1061,8 +1083,9 @@ void BinaryConvolution::initSupportedPrimitiveDescriptors() { void BinaryConvolution::createPrimitive() { auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); - if (!selectedPrimitiveDescriptor) + if (!selectedPrimitiveDescriptor) { OPENVINO_THROW("CPU binary convolution with name '", getName(), "' doesn't have primitive descriptors."); + } auto srcDims = getParentEdgeAt(0)->getMemory().getStaticDims(); auto weiDims = getParentEdgeAt(1)->getMemory().getStaticDims(); @@ -1108,8 +1131,9 @@ void BinaryConvolution::createPrimitive() { int simd_w = implType == impl_desc_type::jit_avx512 ? 16 : 8; jcp.ur_w = implType == impl_desc_type::jit_avx512 ? 4 : 2; - if (jcp.ow < jcp.ur_w) + if (jcp.ow < jcp.ur_w) { jcp.ur_w = jcp.ow; + } jcp.ur_w_tail = jcp.ow % jcp.ur_w; jcp.ic_block = 32; @@ -1138,8 +1162,9 @@ void BinaryConvolution::createPrimitive() { bool args_ok = (jcp.l_pad <= jcp.ur_w) && (r_pad_no_tail <= jcp.ur_w) && IMPLICATION(jcp.kw > 7, (jcp.t_pad == 0 && jcp.l_pad == 0) || (jcp.stride_w == 1 && jcp.stride_h == 1)); - if (!args_ok) + if (!args_ok) { OPENVINO_THROW("BinaryConvolution with name '", getName(), "' has unsupported parameters"); + } #if defined(OPENVINO_ARCH_X86_64) jit_dw_conv_params jcp_dw_conv = {}; if (implType == impl_desc_type::jit_avx512) { @@ -1149,18 +1174,21 @@ void BinaryConvolution::createPrimitive() { } else if (implType == impl_desc_type::sse42) { bin_conv_kernel.reset(new jit_uni_bin_conv_kernel_f32(jcp, jcp_dw_conv, *attr.get())); } - if (bin_conv_kernel) + if (bin_conv_kernel) { bin_conv_kernel->create_ker(); + } #endif } bool BinaryConvolution::canFuse(const NodePtr& node) const { - if (implType == impl_desc_type::ref) + if (implType == impl_desc_type::ref) { return false; + } // Binarization have to be last operation in fusing chain - if (isFusedWith(Type::FakeQuantize)) + if (isFusedWith(Type::FakeQuantize)) { return false; + } if (node->getType() == Type::FakeQuantize) { bool ret = node->getAlgorithm() == Algorithm::FQBinarization; @@ -1298,7 +1326,7 @@ void BinaryConvolution::executeReference(const uint8_t* src, const int nbits = 8; auto extract_bit = [](uint8_t val, uint8_t bit) -> uint8_t { - return (uint8_t)((val >> bit) & 0x0001); + return static_cast((val >> bit) & 0x0001); }; auto ker = [=](int32_t& d, int g, int mb, int oc, int oh, int ow) { @@ -1317,17 +1345,18 @@ void BinaryConvolution::executeReference(const uint8_t* src, uint8_t s; if (ih < 0 || ih >= IH || iw < 0 || iw >= IW) { - if (pad_value == 0) + if (pad_value == 0) { continue; - else - s = pad_value == 1.0f ? (uint8_t)1 : (uint8_t)0; + } else { + s = pad_value == 1.0f ? static_cast(1) : static_cast(0); + } } else { - s = extract_bit(src[iidx / nbits], (uint8_t)(iidx % nbits)); + s = extract_bit(src[iidx / nbits], static_cast(iidx % nbits)); } - uint8_t w = extract_bit(weights[widx / nbits], (uint8_t)(widx % nbits)); + uint8_t w = extract_bit(weights[widx / nbits], static_cast(widx % nbits)); - d += (int32_t)(s ^ w); + d += static_cast(s ^ w); } } } @@ -1386,8 +1415,9 @@ void BinaryConvolution::execute(const dnnl::stream& strm) { } auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); - if (!selectedPrimitiveDescriptor) + if (!selectedPrimitiveDescriptor) { OPENVINO_THROW("CPU binary convolution with name '", getName(), "' doesn't have primitive descriptors."); + } auto implType = selectedPrimitiveDescriptor->getImplementationType(); if (implType != impl_desc_type::ref) { diff --git a/src/plugins/intel_cpu/src/nodes/broadcast.cpp b/src/plugins/intel_cpu/src/nodes/broadcast.cpp index 5d6dc9ebea5bbc..b7e1b19f09f3c7 100644 --- a/src/plugins/intel_cpu/src/nodes/broadcast.cpp +++ b/src/plugins/intel_cpu/src/nodes/broadcast.cpp @@ -57,17 +57,20 @@ Broadcast::Broadcast(const std::shared_ptr& op, const GraphContext::CP OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } - if (op->get_input_size() != 2 && op->get_input_size() != 3) + if (op->get_input_size() != 2 && op->get_input_size() != 3) { THROW_CPU_NODE_ERR("has incorrect number of input edges: ", getParentEdges().size()); - if (op->get_output_size() == 0) + } + if (op->get_output_size() == 0) { THROW_CPU_NODE_ERR("has no output edges."); + } auto broadcastOp = ov::as_type_ptr(op); if (broadcastOp->get_broadcast_spec().m_type == ov::op::AutoBroadcastType::NUMPY) { broadcastType = NUMPY; } else if (broadcastOp->get_broadcast_spec().m_type == ov::op::AutoBroadcastType::EXPLICIT) { - if (op->get_input_size() <= AXES_MAPPING_IDX) + if (op->get_input_size() <= AXES_MAPPING_IDX) { THROW_CPU_NODE_ERR("and EXPLICIT mode must have tree input edges: ", getParentEdges().size()); + } broadcastType = EXPLICIT; } else { THROW_CPU_NODE_ERR("has unexpected broadcast type: ", broadcastOp->get_broadcast_spec().m_type); @@ -105,8 +108,9 @@ void Broadcast::getSupportedDescriptors() { } void Broadcast::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } supportedPrimitiveDescriptors = getSupportedConfigs(this, outputShapes.size()); } @@ -211,10 +215,12 @@ void Broadcast::plainExecute(const dnnl::stream& strm) { VectorDims srcStrides = srcDesc->getStrides(); const size_t dataSize = srcDesc->getPrecision().size(); - if (!dataSrcRank) + if (!dataSrcRank) { srcDims = VectorDims(1, 1); - if (!srcStrides.size()) + } + if (!srcStrides.size()) { srcStrides = VectorDims(1, 1); + } auto dstDesc = getChildEdgeAt(0)->getMemory().getDescWithType(); VectorDims dstStrides = dstDesc->getStrides(); @@ -244,15 +250,17 @@ void Broadcast::plainExecute(const dnnl::stream& strm) { i /= dstDims[j]; } for (size_t iwork = start * dataSize; iwork < end * dataSize; iwork += dataSize) { - for (i = 0lu, srcIdx = 0lu; i < dataDstRank; ++i) + for (i = 0lu, srcIdx = 0lu; i < dataDstRank; ++i) { srcIdx += counters[i] ? ((counters[i] % srcAligned[i]) * srcStridesAligned[i]) : 0; + } cpu_memcpy(&dstData[iwork], &srcData[srcIdx * dataSize], dataSize); for (int j = dataDstRank - 1; j >= 0; j--) { counters[j] = (counters[j] + 1) % dstDims[j]; - if (counters[j] != 0) + if (counters[j] != 0) { break; + } } } }); diff --git a/src/plugins/intel_cpu/src/nodes/bucketize.cpp b/src/plugins/intel_cpu/src/nodes/bucketize.cpp index 67f1c3ff482405..97d4e0daf2ea9b 100644 --- a/src/plugins/intel_cpu/src/nodes/bucketize.cpp +++ b/src/plugins/intel_cpu/src/nodes/bucketize.cpp @@ -37,10 +37,11 @@ Bucketize::Bucketize(const std::shared_ptr& op, const GraphContext::CP } const auto bucketsize = ov::as_type_ptr(op); - if (bucketsize == nullptr) + if (bucketsize == nullptr) { OPENVINO_THROW("Operation with name '", op->get_friendly_name(), "' is not an instance of Bucketize from opset3."); + } if (getOriginalInputsNumber() != 2 || getOriginalOutputsNumber() != 1) { THROW_CPU_NODE_ERR("has incorrect number of input/output edges!"); @@ -51,8 +52,9 @@ Bucketize::Bucketize(const std::shared_ptr& op, const GraphContext::CP } void Bucketize::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } // check precisions for input and output tensors input_precision = getOriginalInputPrecisionAtPort(INPUT_TENSOR_PORT); @@ -188,14 +190,18 @@ void Bucketize::prepareParams() { auto inputTensorMemPtr = getSrcMemoryAtPort(INPUT_TENSOR_PORT); auto inputBinsMemPtr = getSrcMemoryAtPort(INPUT_BINS_PORT); auto dstMemPtr = getDstMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isDefined()) + if (!dstMemPtr || !dstMemPtr->isDefined()) { OPENVINO_THROW("Destination memory is undefined."); - if (!inputTensorMemPtr || !inputTensorMemPtr->isDefined()) + } + if (!inputTensorMemPtr || !inputTensorMemPtr->isDefined()) { OPENVINO_THROW("Input tensor is undefined."); - if (!inputBinsMemPtr || !inputBinsMemPtr->isDefined()) + } + if (!inputBinsMemPtr || !inputBinsMemPtr->isDefined()) { OPENVINO_THROW("Input bins is undefined."); - if (getSelectedPrimitiveDescriptor() == nullptr) + } + if (getSelectedPrimitiveDescriptor() == nullptr) { OPENVINO_THROW("Preferable primitive descriptor is not set."); + } // update with_bins/num_values/num_bin_values auto input_tensor_dims = inputTensorMemPtr->getStaticDims(); @@ -211,8 +217,10 @@ void Bucketize::prepareParams() { } num_bin_values = input_bin_dims[0]; - num_values = - std::accumulate(input_tensor_dims.begin(), input_tensor_dims.end(), size_t(1), std::multiplies()); + num_values = std::accumulate(input_tensor_dims.begin(), + input_tensor_dims.end(), + static_cast(1), + std::multiplies()); } bool Bucketize::isExecutable() const { diff --git a/src/plugins/intel_cpu/src/nodes/causal_mask_preprocess.cpp b/src/plugins/intel_cpu/src/nodes/causal_mask_preprocess.cpp index 93f5278b06a4a8..c21ce4d6e38818 100644 --- a/src/plugins/intel_cpu/src/nodes/causal_mask_preprocess.cpp +++ b/src/plugins/intel_cpu/src/nodes/causal_mask_preprocess.cpp @@ -88,11 +88,11 @@ struct CausalMaskPreprocess::ExecutorCausalMaskPreprocess : public CausalMaskPre bool cmask_eq0 = (j <= row); bool amask_eq0 = (pamask[j] == 0); bool padding_mask = (cmask_eq0 && amask_eq0); - pdst[j] = (padding_mask | (!cmask_eq0)) ? min_dtype : T(0); + pdst[j] = (padding_mask | (!cmask_eq0)) ? min_dtype : static_cast(0); } for (; j < kvLen; j++) { bool cmask_eq0 = (j <= row); - pdst[j] = cmask_eq0 ? T(0) : min_dtype; + pdst[j] = cmask_eq0 ? static_cast(0) : min_dtype; } }); DEBUG_LOG("CausalMaskPreprocess::execute dst=", t_dst); @@ -125,8 +125,9 @@ bool CausalMaskPreprocess::isSupportedOperation(const std::shared_ptr iprecs = getOriginalInputPrecisions(); std::vector oprecs = getOriginalOutputPrecisions(); @@ -141,19 +142,22 @@ void CausalMaskPreprocess::initSupportedPrimitiveDescriptors() { oprecs[0] = ov::element::f32; } // all input precisions must be int32 - for (auto& prec : iprecs) + for (auto& prec : iprecs) { prec = ov::element::i32; + } } else { OPENVINO_THROW("CPU: CausalMaskPreprocess type not supported : " + m_config.type); } std::vector inPortConfigs; - for (size_t i = 0; i < getOriginalInputsNumber(); i++) + for (size_t i = 0; i < getOriginalInputsNumber(); i++) { inPortConfigs.emplace_back(LayoutType::ncsp, iprecs[i], getInputShapeAtPort(i), false, -1); + } std::vector outPortConfigs; - for (size_t i = 0; i < getOriginalOutputsNumber(); i++) + for (size_t i = 0; i < getOriginalOutputsNumber(); i++) { outPortConfigs.emplace_back(LayoutType::ncsp, oprecs[i], getOutputShapeAtPort(i), false, -1); + } addSupportedPrimDesc(inPortConfigs, outPortConfigs, impl_desc_type::ref_any); } diff --git a/src/plugins/intel_cpu/src/nodes/col2im.cpp b/src/plugins/intel_cpu/src/nodes/col2im.cpp index 58c1e36a9e308a..f3f432ad1608f6 100644 --- a/src/plugins/intel_cpu/src/nodes/col2im.cpp +++ b/src/plugins/intel_cpu/src/nodes/col2im.cpp @@ -40,8 +40,9 @@ void Col2Im::getSupportedDescriptors() { } void Col2Im::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } ov::element::Type dataPrecision = getOriginalInputPrecisionAtPort(0); addSupportedPrimDesc( {{LayoutType::ncsp, dataPrecision}, {LayoutType::ncsp, ov::element::i32}, {LayoutType::ncsp, ov::element::i32}}, diff --git a/src/plugins/intel_cpu/src/nodes/color_convert.cpp b/src/plugins/intel_cpu/src/nodes/color_convert.cpp index dd6d5ede57b2bd..41cda88022699a 100644 --- a/src/plugins/intel_cpu/src/nodes/color_convert.cpp +++ b/src/plugins/intel_cpu/src/nodes/color_convert.cpp @@ -27,14 +27,18 @@ namespace node { namespace { std::tuple getAlgorithmFor(const std::shared_ptr& op) { - if (ov::is_type(op)) + if (ov::is_type(op)) { return std::make_tuple(Algorithm::ColorConvertNV12toRGB, std::string()); - if (ov::is_type(op)) + } + if (ov::is_type(op)) { return std::make_tuple(Algorithm::ColorConvertNV12toBGR, std::string()); - if (ov::is_type(op)) + } + if (ov::is_type(op)) { return std::make_tuple(Algorithm::ColorConvertI420toRGB, std::string()); - if (ov::is_type(op)) + } + if (ov::is_type(op)) { return std::make_tuple(Algorithm::ColorConvertI420toBGR, std::string()); + } return std::make_tuple(Algorithm::Default, std::string("Type ") + op->get_type_name() + " is not supported."); } @@ -123,8 +127,9 @@ struct jit_uni_converter : public jit_kernel { jit_uni_converter::jit_uni_converter() : jit_kernel(jit_name()), _consts(*this) {} void jit_uni_converter::init() { - if (create_kernel() != status::success) + if (create_kernel() != status::success) { OPENVINO_THROW("Can't generate jit color converter kernel"); + } _fn = (function_t)jit_ker(); } @@ -135,8 +140,9 @@ void jit_uni_converter::yuv_to_rgb(const variable& y, const variable& color_format, bool round) { auto clip = [&](const variable& op, const variable& a, const variable& b) { - if (round) + if (round) { uni_vroundps(op, op, 0); + } uni_vmaxps(op, op, a); uni_vminps(op, op, b); }; @@ -177,8 +183,9 @@ void jit_uni_converter::yuv_to_rgb(const variable& y, auto genPermutationMask = [&](int offset) { std::array mask{}; - for (uint8_t i = 0; i < mask.size(); ++i) + for (uint8_t i = 0; i < mask.size(); ++i) { mask[(i * 3 + offset) % mask.size()] = i; + } return mask; }; @@ -266,7 +273,7 @@ void jit_uni_converter::store_tail(const variable& dst, sptr += step; store(sptr, c); - auto copy_size = size * size_t(3u); + auto copy_size = size * static_cast(3u); copy(ptr[dst], s.pointer(), copy_size); } @@ -312,10 +319,12 @@ class RefConverter : public Converter { }; RefConverter::RefConverter(Node* node) : Converter(node) { - if (node->getOriginalInputsNumber() != (singlePlane() ? 1 : 2)) + if (node->getOriginalInputsNumber() != (singlePlane() ? 1 : 2)) { OPENVINO_THROW("NV12Converter node has incorrect number of inputs"); - if (!node->getOriginalOutputsNumber()) + } + if (!node->getOriginalOutputsNumber()) { OPENVINO_THROW("NV12Converter node has incorrect number of outputs"); + } } template @@ -639,10 +648,12 @@ class RefConverter : public Converter { }; RefConverter::RefConverter(Node* node) : Converter(node) { - if (node->getOriginalInputsNumber() != (singlePlane() ? 1 : 3)) + if (node->getOriginalInputsNumber() != (singlePlane() ? 1 : 3)) { OPENVINO_THROW("I420Converter node has incorrect number of inputs"); - if (!node->getOriginalOutputsNumber()) + } + if (!node->getOriginalOutputsNumber()) { OPENVINO_THROW("I420Converter node has incorrect number of outputs"); + } } template @@ -968,15 +979,17 @@ ColorConvert::ColorConvert(const std::shared_ptr& op, const GraphConte : Node(op, context, ColorConvertShapeInferFactory(op)) { std::string errorMessage; std::tie(algorithm, errorMessage) = getAlgorithmFor(op); - if (algorithm == Algorithm::Default) + if (algorithm == Algorithm::Default) { OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); + } } void ColorConvert::getSupportedDescriptors() {} void ColorConvert::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } switch (algorithm) { case Algorithm::ColorConvertNV12toRGB: @@ -1064,9 +1077,10 @@ void ColorConvert::initSupportedI420Impls() { void ColorConvert::createPrimitive() { const NodeDesc* desc = getSelectedPrimitiveDescriptor(); - if (!desc) + if (!desc) { OPENVINO_THROW(getTypeStr() + " node with name '" + getName() + "' ", "no optimal primitive descriptor selected"); + } if (!_impl) { const auto& cfg = desc->getConfig(); @@ -1079,8 +1093,9 @@ void ColorConvert::createPrimitive() { } void ColorConvert::execute(const dnnl::stream& strm) { - if (!_impl) + if (!_impl) { OPENVINO_THROW(getTypeStr() + " node with name '" + getName() + "' ", "has no any implemented converter"); + } _impl->execute(strm); } diff --git a/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp b/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp index 5eb90a065c67be..bd93476320540c 100644 --- a/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp +++ b/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp @@ -111,12 +111,15 @@ class jit_convert_array : public jit_kernel { postamble(); - if (f8_e4m3_emu_) + if (f8_e4m3_emu_) { f8_e4m3_emu_->prepare_table(); - if (f8_e5m2_emu_) + } + if (f8_e5m2_emu_) { f8_e5m2_emu_->prepare_table(); - if (uni_vcvtneps2bf16_) + } + if (uni_vcvtneps2bf16_) { uni_vcvtneps2bf16_->emit_data(); + } } public: @@ -607,8 +610,9 @@ struct ConvertPrecision> { batch_type tmp; const size_t offset = i * batch; const size_t current_batch_size = std::min(ctx.size - offset, batch); - for (size_t j = 0; j < current_batch_size; ++j) // src_t -> fp32 + for (size_t j = 0; j < current_batch_size; ++j) { // src_t -> fp32 tmp[j] = static_cast(std::max(std::min(src[offset + j], ubound), lbound)); + } jit_convert(tmp, dst + offset, current_batch_size); // fp32 -> fp16 }); } else if (ctx.interimPrc.is_real()) { @@ -619,8 +623,9 @@ struct ConvertPrecision> { jit_convert(reinterpret_cast(src) + offset, dst + offset, current_batch_size); } else { batch_type tmp; - for (size_t j = 0; j < current_batch_size; ++j) // src_t -> fp32 + for (size_t j = 0; j < current_batch_size; ++j) { // src_t -> fp32 tmp[j] = static_cast(src[offset + j]); + } jit_convert(tmp, dst + offset, current_batch_size); // fp32 -> fp16 } }); @@ -629,8 +634,9 @@ struct ConvertPrecision> { batch_type tmp; const size_t offset = i * batch; const size_t current_batch_size = std::min(ctx.size - offset, batch); - for (size_t j = 0; j < current_batch_size; ++j) // src_t -> fp32 + for (size_t j = 0; j < current_batch_size; ++j) { // src_t -> fp32 tmp[j] = static_cast(std::trunc(std::max(std::min(src[offset + j], ubound), lbound))); + } jit_convert(tmp, dst + offset, current_batch_size); // fp32 -> fp16 }); } @@ -658,8 +664,9 @@ struct ConvertPrecision> { const size_t offset = i * batch; const size_t current_batch_size = std::min(ctx.size - offset, batch); jit_convert(src + offset, tmp, current_batch_size); // fp16 -> fp32 - for (size_t j = 0; j < current_batch_size; ++j) // fp32 -> dst_t + for (size_t j = 0; j < current_batch_size; ++j) { // fp32 -> dst_t dst[offset + j] = static_cast(std::max(std::min(tmp[j], ubound), lbound)); + } }); } else if (ctx.interimPrc.is_real()) { parallel_for(iterations, [&](size_t i) { @@ -670,8 +677,9 @@ struct ConvertPrecision> { } else { batch_type tmp; jit_convert(src + offset, tmp, current_batch_size); // fp16 -> fp32 - for (size_t j = 0; j < current_batch_size; ++j) // fp32 -> dst_t + for (size_t j = 0; j < current_batch_size; ++j) { // fp32 -> dst_t dst[offset + j] = static_cast(tmp[j]); + } } }); } else { @@ -680,8 +688,9 @@ struct ConvertPrecision> { const size_t offset = i * batch; const size_t current_batch_size = std::min(ctx.size - offset, batch); jit_convert(src + offset, tmp, current_batch_size); // fp16 -> fp32 - for (size_t j = 0; j < current_batch_size; ++j) // fp32 -> dst_t + for (size_t j = 0; j < current_batch_size; ++j) { // fp32 -> dst_t dst[offset + j] = static_cast(std::trunc(std::max(std::min(tmp[j], ubound), lbound))); + } }); } @@ -710,8 +719,9 @@ struct ConvertPrecision> { const size_t offset = i * batch; const size_t current_batch_size = std::min(ctx.size - offset, batch); jit_convert(src + offset, tmp, current_batch_size); // fp16 -> fp32 - for (size_t j = 0; j < current_batch_size; ++j) // truncate fp32 + for (size_t j = 0; j < current_batch_size; ++j) { // truncate fp32 tmp[j] = std::trunc(std::max(std::min(tmp[j], ubound), lbound)); + } jit_convert(tmp, dst + offset, current_batch_size); // fp32 -> fp16 }); } @@ -957,8 +967,9 @@ void cpu_convert(const void* srcPtr, if (size == 0) { return; } - if (srcPtr == nullptr || dstPtr == nullptr) + if (srcPtr == nullptr || dstPtr == nullptr) { OPENVINO_THROW("cpu_convert has null data pointer"); + } if (srcPrc == dstPrc && srcPrc == interimPrc) { const size_t L2_cache_size = dnnl::utils::get_cache_size(2, true); @@ -979,7 +990,7 @@ void cpu_convert(const void* srcPtr, cpu_memcpy(dstPtr, srcPtr, size * dstPrc.size()); } } else if (srcPrc == ov::element::u1) { - if (srcPrc.bitwidth() != 1) + if (srcPrc.bitwidth() != 1) { OPENVINO_THROW("cpu_convert can't convert from: ", srcPrc, " precision to: ", dstPrc, ". Not implemented."); + } ConvertFromBinContext ctx{srcPtr, dstPtr, size, false}; OV_SWITCH(intel_cpu, ConvertFromBinPrecision, ctx, std::tie(srcPrc, dstPrc), INTEL_CPU_CVT_FROM_BIN_LIST); - if (!ctx.converted) + if (!ctx.converted) { OPENVINO_THROW("cpu_convert can't convert from: ", srcPrc, " precision to: ", dstPrc); + } } else if (srcPrc.bitwidth() == 4u) { ConvertFrom4BitContext ctx{srcPrc, srcPtr, dstPtr, size, false}; OV_SWITCH(intel_cpu, ConvertFrom4BitPrecision, ctx, std::tie(srcPrc, dstPrc), INTEL_CPU_CVT_FROM_4BIT_LIST); - if (!ctx.converted) + if (!ctx.converted) { OPENVINO_THROW("cpu_convert can't convert from: ", srcPrc, " precision to: ", dstPrc); + } } else if (srcPrc == ov::element::f8e8m0) { ConvertFromByteFPContext ctx{srcPrc, srcPtr, dstPtr, size, false}; OV_SWITCH(intel_cpu, @@ -1008,22 +1022,25 @@ void cpu_convert(const void* srcPtr, ctx, std::tie(srcPrc, dstPrc), INTEL_CPU_CVT_FROM_BYTE_FP_LIST); - if (!ctx.converted) + if (!ctx.converted) { OPENVINO_THROW("cpu_convert can't convert from: ", srcPrc, " precision to: ", dstPrc); + } #if defined(OPENVINO_ARCH_X86_64) } else if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_fp16) && (one_of(srcPrc, ov::element::f8e4m3, ov::element::f8e5m2) || one_of(dstPrc, ov::element::f8e4m3, ov::element::f8e5m2))) { ConvertFP8Context ctx{srcPtr, dstPtr, size, false}; OV_SWITCH(intel_cpu, ConvertFP8Precision, ctx, std::tie(srcPrc, dstPrc), INTEL_CPU_CVT_FP8_LIST); - if (!ctx.converted) + if (!ctx.converted) { OPENVINO_THROW("cpu_convert can't convert from: ", srcPrc, " precision to: ", dstPrc); + } #endif } else { ConvertContext ctx{srcPtr, dstPtr, size, interimPrc, dstPrc, false}; OV_SWITCH(intel_cpu, ConvertPrecision, ctx, std::tie(srcPrc, dstPrc), INTEL_CPU_CVT_LIST); - if (!ctx.converted) + if (!ctx.converted) { OPENVINO_THROW("cpu_convert can't convert from: ", srcPrc, " precision to: ", dstPrc); + } } } diff --git a/src/plugins/intel_cpu/src/nodes/common/permute_kernel.cpp b/src/plugins/intel_cpu/src/nodes/common/permute_kernel.cpp index 276d7b6b2920d9..aa853ab6d9101e 100644 --- a/src/plugins/intel_cpu/src/nodes/common/permute_kernel.cpp +++ b/src/plugins/intel_cpu/src/nodes/common/permute_kernel.cpp @@ -181,8 +181,9 @@ PermuteKernel::PermuteKernel(const PermuteParams& params) : params(params) { } #endif // OPENVINO_ARCH_X86_64 - if (permute_kernel) + if (permute_kernel) { permute_kernel->create_ker(); + } } void PermuteKernel::execute(const uint8_t* src_data, uint8_t* dst_data, const int mb) { @@ -209,8 +210,9 @@ void PermuteKernel::optimizedExecute(const uint8_t* src_data, uint8_t* dst_data, const VectorDims dst_strides = jcp.dst_strides; const VectorDims src_strides = jcp.src_strides; - if (static_cast(dst_dims[0]) != mb) + if (static_cast(dst_dims[0]) != mb) { dst_dims[0] = mb; + } switch (jcp.n) { case 1: diff --git a/src/plugins/intel_cpu/src/nodes/common/softmax.cpp b/src/plugins/intel_cpu/src/nodes/common/softmax.cpp index 2c9697abb6f718..05087cdd31c831 100644 --- a/src/plugins/intel_cpu/src/nodes/common/softmax.cpp +++ b/src/plugins/intel_cpu/src/nodes/common/softmax.cpp @@ -71,8 +71,9 @@ struct jit_uni_softmax_kernel_f32 : public jit_uni_softmax_kernel, public jit_ge exp_injector.reset( new jit_uni_eltwise_injector(this, dnnl::impl::alg_kind::eltwise_exp, 0.f, 0.f, 1.0f, data_type::f32)); - if (mayiuse(avx512_core)) + if (mayiuse(avx512_core)) { uni_vcvtneps2bf16.reset(new jit_uni_vcvtneps2bf16(this, isa)); + } this->preamble(); @@ -172,8 +173,9 @@ struct jit_uni_softmax_kernel_f32 : public jit_uni_softmax_kernel, public jit_ge this->postamble(); - if (uni_vcvtneps2bf16) + if (uni_vcvtneps2bf16) { uni_vcvtneps2bf16->emit_data(); + } exp_injector->prepare_table(); } @@ -261,8 +263,9 @@ SoftmaxGeneric::SoftmaxGeneric(ov::element::Type inpPrc, ov::element::Type outPr softmax_kernel.reset(new jit_uni_softmax_kernel_f32(jcp)); block_size = 4; } - if (softmax_kernel) + if (softmax_kernel) { softmax_kernel->create_ker(); + } #endif } @@ -279,8 +282,8 @@ void SoftmaxGeneric::calculate(const in_data_t* src_data, out_data_t* dst_data, arg.src = src_data + b * C * H * W + ib * block_size; arg.dst = dst_data + b * C * H * W + ib * block_size; - arg.src_stride = static_cast((size_t)(H)*W * sizeof(in_data_t)); - arg.dst_stride = static_cast((size_t)(H)*W * sizeof(out_data_t)); + arg.src_stride = static_cast(static_cast(H) * W * sizeof(in_data_t)); + arg.dst_stride = static_cast(static_cast(H) * W * sizeof(out_data_t)); arg.work_amount = static_cast(C); (*softmax_kernel)(&arg); @@ -294,8 +297,9 @@ void SoftmaxGeneric::calculate(const in_data_t* src_data, out_data_t* dst_data, float max = src_data[b * C * H * W + offset]; for (int c = 0; c < C; c++) { float val = src_data[b * C * H * W + c * H * W + offset]; - if (val > max) + if (val > max) { max = val; + } } float expSum = 0; diff --git a/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.cpp b/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.cpp index 203703449a2d6d..b826686c763dbe 100644 --- a/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.cpp +++ b/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.cpp @@ -65,8 +65,10 @@ bool TileBroadcastCommon::canBeExecutedInBlockedLayout(VectorDims srcBlockedDims VectorDims blockedRepeats, const size_t elemsInBlock) { if (srcBlockedDims.empty() || blockedRepeats.empty() || elemsInBlock == 0lu || - srcBlockedDims[1] == Shape::UNDEFINED_DIM || (blockedRepeats[1] != 1 && srcBlockedDims[1] % elemsInBlock != 0)) + srcBlockedDims[1] == Shape::UNDEFINED_DIM || + (blockedRepeats[1] != 1 && srcBlockedDims[1] % elemsInBlock != 0)) { return false; + } srcBlockedDims[1] = div_up(srcBlockedDims[1], elemsInBlock); srcBlockedDims.push_back(elemsInBlock); @@ -102,7 +104,7 @@ std::vector TileBroadcastCommon::getSupportedConfigs(const Node* node, size_t outDataShapeRank = node->getOutputShapeAtPort(0).getRank(); NodeConfig config; - if (repeats.size() != outDataShapeRank && !repeats.empty()) + if (repeats.size() != outDataShapeRank && !repeats.empty()) { OPENVINO_THROW(node->getTypeStr(), " node with name ", node->getName(), @@ -111,6 +113,7 @@ std::vector TileBroadcastCommon::getSupportedConfigs(const Node* node, repeats.size(), ", output shape rank: ", outDataShapeRank); + } config.inConfs.resize(node->getParentEdges().size()); config.inConfs[0].inPlace(-1); @@ -206,8 +209,9 @@ bool TileBroadcastCommon::prepareOptimizedParams(const Node* node, fillOptimizedDimsAndSrcStrides(srcBlockedDims, blockedRepeats, optimizedDims, optimizedSrcStrides); constexpr size_t maxNDims = 6lu; - if (optimizedDims.size() > maxNDims) + if (optimizedDims.size() > maxNDims) { return false; + } while (optimizedDims.size() < maxNDims) { optimizedDims.insert(optimizedDims.begin(), 1); diff --git a/src/plugins/intel_cpu/src/nodes/concat.cpp b/src/plugins/intel_cpu/src/nodes/concat.cpp index c82a187fdfdbbd..614b19590e05aa 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.cpp +++ b/src/plugins/intel_cpu/src/nodes/concat.cpp @@ -74,8 +74,9 @@ void Concat::getSupportedDescriptors() { const auto& dims = getInputShapeAtPort(i).getDims(); bool incorrectDims = false; for (size_t j = 0; j < firstParentDims.size(); j++) { - if (j == axis) + if (j == axis) { continue; + } if (dims.size() != firstParentDims.size() || !dimsEqualWeak(firstParentDims[j], dims[j])) { incorrectDims = true; break; @@ -93,13 +94,15 @@ void Concat::getSupportedDescriptors() { if (childDims[axis] != Shape::UNDEFINED_DIM && std::all_of(childDims.begin(), childDims.begin() + axis, [](size_t dim) { return dim == 1; - })) + })) { canBeInPlace = true; + } } void Concat::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } auto& originInputPrecisions = getOriginalInputPrecisions(); inputPrecision = originInputPrecisions[0]; @@ -112,8 +115,9 @@ void Concat::initSupportedPrimitiveDescriptors() { } // Concat doesn't support different precision on inputs so fallback on FP32 in such case - if (isMixedPrecision) + if (isMixedPrecision) { inputPrecision = ov::element::f32; + } // Concat supports only equal precisions for inputs and output outputPrecision = inputPrecision; @@ -126,8 +130,9 @@ void Concat::initSupportedPrimitiveDescriptors() { if (dstShape.getRank() > channelAxis) { for (auto& item : {std::make_pair(8lu, LayoutType::nCsp8c), std::make_pair(16lu, LayoutType::nCsp16c)}) { const VectorDims& blkDims = dstShape.getDims(); - if (blkDims[channelAxis] == Shape::UNDEFINED_DIM || blkDims[channelAxis] % item.first != 0) + if (blkDims[channelAxis] == Shape::UNDEFINED_DIM || blkDims[channelAxis] % item.first != 0) { continue; + } bool blocked = true; for (size_t i = 0; i < getParentEdges().size(); i++) { @@ -189,8 +194,9 @@ void Concat::initSupportedPrimitiveDescriptors() { if (!canBeInPlace || std::any_of(inputShapes.begin(), inputShapes.end(), [](const Shape& shape) { return shape.hasZeroDims(); - })) + })) { return; + } // Optimized inplace case for (auto refPdIndex : pdIndexesToReuse) { @@ -211,8 +217,9 @@ void Concat::selectOptimalPrimitiveDescriptor() { // for that case. for (size_t i = 0; i < getParentEdges().size(); i++) { for (size_t j = i + 1; j < getParentEdges().size(); j++) { - if (getParentEdgeAt(i) == getParentEdgeAt(j)) + if (getParentEdgeAt(i) == getParentEdgeAt(j)) { canBeInPlace = false; + } } } @@ -226,13 +233,15 @@ void Concat::selectOptimalPrimitiveDescriptor() { auto parent = parentEdge->getParent(); auto parent_pdesc = parent->getSelectedPrimitiveDescriptor(); - if (parent_pdesc == nullptr) + if (parent_pdesc == nullptr) { continue; + } const auto& parent_config = parent_pdesc->getConfig(); int outputIndex = parentEdge->getInputNum(); - if (outputIndex < 0 || outputIndex >= static_cast(parent_config.outConfs.size())) + if (outputIndex < 0 || outputIndex >= static_cast(parent_config.outConfs.size())) { OPENVINO_THROW("Cannot find index of output node"); + } const auto& port_desc = parent_config.outConfs[outputIndex].getMemDesc(); for (auto& item : supportedLayouts) { if (port_desc->hasLayoutType(item)) { @@ -244,13 +253,15 @@ void Concat::selectOptimalPrimitiveDescriptor() { auto childEdge = getChildEdgeAt(i); auto child = childEdge->getChild(); const auto* prim_desc = child->getSelectedPrimitiveDescriptor(); - if (prim_desc == nullptr) + if (prim_desc == nullptr) { continue; + } const auto& config = prim_desc->getConfig(); int inputIndex = childEdge->getOutputNum(); - if (inputIndex < 0 || inputIndex >= static_cast(config.inConfs.size())) + if (inputIndex < 0 || inputIndex >= static_cast(config.inConfs.size())) { OPENVINO_THROW("Cannot find index of output node"); + } const auto& port_desc = config.inConfs[inputIndex].getMemDesc(); for (auto& item : supportedLayouts) { if (port_desc->hasLayoutType(item)) { @@ -336,15 +347,18 @@ bool Concat::needPrepareParams() const { } void Concat::prepareParams() { - if (canOptimizeNspc || isInPlace()) + if (canOptimizeNspc || isInPlace()) { return; + } const auto& dstMemPtr = getDstMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isDefined()) + if (!dstMemPtr || !dstMemPtr->isDefined()) { OPENVINO_THROW("Destination memory is undefined."); + } auto dstMemDesc = dstMemPtr->getDescWithType(); - if (getSelectedPrimitiveDescriptor() == nullptr) + if (getSelectedPrimitiveDescriptor() == nullptr) { OPENVINO_THROW("Preferable primitive descriptor is not set."); + } const auto& outputStrides = dstMemDesc->getStrides(); size_t curConcatOffset = 0; @@ -379,8 +393,9 @@ void Concat::prepareParams() { break; } } - if (canOptimize1DCase) + if (canOptimize1DCase) { return; + } } std::vector srcs_d; @@ -453,8 +468,9 @@ size_t Concat::inverseOrder(const VectorDims& order, size_t axis) { void Concat::initOptimalPrimitiveDescriptor() { auto selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) + if (selected_pd == nullptr) { OPENVINO_THROW("Preferable primitive descriptor is not set."); + } if (!isInPlace()) { Node::initOptimalPrimitiveDescriptor(); @@ -637,8 +653,9 @@ void Concat::execRef() { numSrc, [&](size_t n0, size_t n1, size_t n2, size_t n3, size_t n4, size_t a) { // check if zero memory - if (srcPtrs[a] == nullptr) + if (srcPtrs[a] == nullptr) { return; + } size_t inOff = inputStrides[a][0] * n0 + inputStrides[a][1] * n1 + inputStrides[a][2] * n2 + inputStrides[a][3] * n3 + inputStrides[a][4] * n4; @@ -697,8 +714,9 @@ void Concat::resolveInPlaceEdges(Edge::LOOK look) { } auto selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) + if (selected_pd == nullptr) { OPENVINO_THROW("Preferable primitive descriptor is not set."); + } auto& config = selected_pd->getConfig(); size_t numberOfInputs = config.inConfs.size(); size_t inplaceOutIndx = selected_pd->getConfig().inConfs[0].inPlace(); diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp index 3240599d00c819..0f9fd20d24fa75 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/conv.cpp @@ -146,8 +146,9 @@ class Convolution::FusedSubgraph { return false; }); - if (itr == opList.end()) + if (itr == opList.end()) { return; + } auto sumNode = *itr; addEdge(inp0, sumNode, 0, 0); @@ -320,19 +321,22 @@ bool Convolution::canBeExecutedInInt8() const { auto inputDataType = DnnlExtensionUtils::ElementTypeToDataType(getOriginalInputPrecisionAtPort(0)); auto weightsDataType = DnnlExtensionUtils::ElementTypeToDataType(getOriginalInputPrecisionAtPort(1)); - if (!legacyInputZeroPoints.empty()) + if (!legacyInputZeroPoints.empty()) { inputDataType = memory::data_type::u8; + } - if (!legacyWeightsZeroPoints.empty()) + if (!legacyWeightsZeroPoints.empty()) { weightsDataType = memory::data_type::s8; + } return one_of(inputDataType, memory::data_type::u8, memory::data_type::s8) && weightsDataType == memory::data_type::s8; } ov::element::Type Convolution::fusedEltwisePrecision(const NodePtr& fusingNode) const { - if (sumPrc != ov::element::undefined) + if (sumPrc != ov::element::undefined) { return sumPrc; + } ov::element::Type eltwisePrecision; @@ -391,8 +395,9 @@ const std::vector& Convolution::getDefaultImplPriority() { impl_desc_type::ref_any, impl_desc_type::ref, }; - if (isBrgConvAvailable()) + if (isBrgConvAvailable()) { return priorities; + } static const std::vector priorities_wo_brgemm = [&] { std::vector result; @@ -411,10 +416,12 @@ const bool Convolution::isBrgConvAvailable() { } void Convolution::getSupportedDescriptors() { - if (!descs.empty()) + if (!descs.empty()) { return; - if (!attrs.empty()) + } + if (!attrs.empty()) { OPENVINO_THROW("attrs vector is not empty '", getName(), "'"); + } attrs.reserve(2); withBiases = getOriginalInputsNumber() == 3; @@ -434,8 +441,9 @@ void Convolution::getSupportedDescriptors() { } auto inputDataType = DnnlExtensionUtils::ElementTypeToDataType(getOriginalInputPrecisionAtPort(0)); - if (!legacyInputZeroPoints.empty()) + if (!legacyInputZeroPoints.empty()) { inputDataType = memory::data_type::u8; + } outputDataType = DnnlExtensionUtils::ElementTypeToDataType(getOriginalOutputPrecisionAtPort(0)); eltwisePrecision = DnnlExtensionUtils::DataTypeToElementType(outputDataType); @@ -465,15 +473,17 @@ void Convolution::getSupportedDescriptors() { } } - if (static_cast(getParentEdges().size()) != expectedInputEdgesNum) + if (static_cast(getParentEdges().size()) != expectedInputEdgesNum) { OPENVINO_THROW("Incorrect number of input edges for layer ", getName(), ", expected: ", expectedInputEdgesNum, " actual: ", getParentEdges().size()); - if (getChildEdges().empty()) + } + if (getChildEdges().empty()) { OPENVINO_THROW("Incorrect number of output edges for layer ", getName()); + } int ndims = getInputShapeAtPort(0).getRank(); @@ -558,8 +568,9 @@ void Convolution::getSupportedDescriptors() { } // fallback to f32 on special case for performance reasons - if (isDepthWise() && ndims == 5) + if (isDepthWise() && ndims == 5) { dt = memory::data_type::f32; + } return dt; }; @@ -593,8 +604,9 @@ void Convolution::getSupportedDescriptors() { } SetPostOpsAndZeroPoints(attrs); - if (!one_of(ndims, 3, 4, 5)) + if (!one_of(ndims, 3, 4, 5)) { return; + } auto inputShape = getInputShapeAtPort(0); auto outputShape = getOutputShapeAtPort(0); @@ -670,8 +682,9 @@ void Convolution::setPostOps(dnnl::primitive_attr& attr, auto& node = fusedWith[i]; bool isLastPostOp = (i == (fusedWith.size() - 1)); - if (node->getType() == Type::Split || node->getType() == Type::Concatenation) + if (node->getType() == Type::Split || node->getType() == Type::Concatenation) { continue; + } if (auto* eltwiseNode = dynamic_cast(node.get())) { if (eltwiseNode->isSpecialConvolutionAddFusing()) { @@ -782,12 +795,14 @@ void Convolution::selectOptimalPrimitiveDescriptor() { } void Convolution::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } auto getBlockedMask = [](const std::shared_ptr& memDesc, const bool isGrouped) { - if (memDesc->getType() & MemoryDescType::Blocked && !isGrouped) + if (memDesc->getType() & MemoryDescType::Blocked && !isGrouped) { return BlockedMemoryDesc::EMPTY_MASK; + } return BlockedMemoryDesc::FULL_MASK; }; @@ -885,8 +900,9 @@ void Convolution::initSupportedPrimitiveDescriptors() { // fallback. if none of the primitive types is present in the priority list just add first implementation // @todo this fallback is not necessary if primitive priority list is filled correctly - if (supportedPrimitiveDescriptors.empty()) + if (supportedPrimitiveDescriptors.empty()) { add_supported_desc(first_desc); + } } } @@ -1036,8 +1052,9 @@ void Convolution::createDescriptor(const std::vector& inputDesc, } void Convolution::addZeroPoints(dnnl::primitive_attr& attr) { - if (inputZeroPoints.empty()) + if (inputZeroPoints.empty()) { return; + } DEBUG_LOG(getName(), ": Set original input zeropoints"); attr.set_zero_points_mask(DNNL_ARG_SRC, 0); @@ -1259,8 +1276,9 @@ bool Convolution::isNspcAvailable() const { } } // AVX2 heuristic - if (useJitPlanar) + if (useJitPlanar) { return false; + } // A bunch of heuristics are designed to cut off not optimal nspc convolution applications auto inpDims = getInputShapeAtPort(0).getDims(); auto outDims = getOutputShapeAtPort(0).getDims(); @@ -1328,22 +1346,27 @@ void Convolution::prepareParams() { auto srcMemPtr = getSrcMemoryAtPort(0); auto wghMemPtr = getSrcMemoryAtPort(1); auto dstMemPtr = getOutputMemory(); - if (!dstMemPtr || !dstMemPtr->isDefined()) + if (!dstMemPtr || !dstMemPtr->isDefined()) { OPENVINO_THROW("Destination memory was undefined."); - if (!srcMemPtr || !srcMemPtr->isDefined()) + } + if (!srcMemPtr || !srcMemPtr->isDefined()) { OPENVINO_THROW("Input memory was undefined."); - if (!wghMemPtr || !wghMemPtr->isDefined()) + } + if (!wghMemPtr || !wghMemPtr->isDefined()) { OPENVINO_THROW("Weight memory was undefined."); + } MemoryPtr biasMemPtr = nullptr; if (withBiases) { biasMemPtr = getSrcMemoryAtPort(2); - if (!biasMemPtr || !biasMemPtr->isDefined()) + if (!biasMemPtr || !biasMemPtr->isDefined()) { OPENVINO_THROW("Input memory is undefined."); + } } const NodeDesc* selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) + if (selected_pd == nullptr) { OPENVINO_THROW("Preferable primitive descriptor is not set for node ", getName(), "."); + } DnnlMemoryDescCPtr inMemoryDesc = srcMemPtr->getDescWithType(); DnnlMemoryDescCPtr weightMemoryDesc = wghMemPtr->getDescWithType(); @@ -1355,10 +1378,11 @@ void Convolution::prepareParams() { auto initPrimitiveAttr = [&]() { dnnl::primitive_attr attr; - if (preferLegacyZeroPoint) + if (preferLegacyZeroPoint) { addLegacyZeroPoints(attr); - else + } else { addZeroPoints(attr); + } setPostOps(attr, outMemoryDesc->getShape().getStaticDims(), preferLegacyPostOps, true); attr.set_scratchpad_mode(dnnl::scratchpad_mode::user); @@ -1472,8 +1496,9 @@ void Convolution::prepareParams() { key.attr); // unable to create a primitive desc - if (!reorderConvDesc) + if (!reorderConvDesc) { return nullptr; + } if (key.attr.get()->post_ops_.count(dnnl::impl::primitive_kind::sum)) { return std::make_shared(reorderConvDesc, @@ -1499,8 +1524,9 @@ void Convolution::prepareParams() { execPtr = result.first; - if (!execPtr) + if (!execPtr) { OPENVINO_THROW("Primitive descriptor was not found for node ", getName(), "."); + } primArgs[DNNL_ARG_SRC] = srcMemPtr->getPrimitive(); primArgs[DNNL_ARG_DST] = dstMemPtr->getPrimitive(); @@ -1523,10 +1549,11 @@ void Convolution::prepareParams() { primArgs[DNNL_ARG_BIAS] = biasMemPtr->getPrimitive(); } - if (preferLegacyZeroPoint) + if (preferLegacyZeroPoint) { appendLegacyZeroPointsArgs(); - else + } else { appendZeroPointsArgs(); + } Node::appendPostOpArgs(*pAttrLocal, primArgs, convPostOpsArgs[preferLegacyPostOps]); @@ -1748,14 +1775,17 @@ void Convolution::appendZeroPointsArgs() { } void Convolution::initializeInputZeroPoints(const uint8_t* inputZpData, const size_t inputZpSize) { - if (!inputZeroPoints.empty() || !legacyInputZeroPoints.empty()) + if (!inputZeroPoints.empty() || !legacyInputZeroPoints.empty()) { OPENVINO_THROW("input zero point is not empty '", getName(), "'"); - if (inputZpSize) + } + if (inputZpSize) { inputZeroPointType = zpType::PerTensor; + } for (size_t j = 0; j < inputZpSize; j++) { legacyInputZeroPoints.push_back(inputZpData[j]); - if (inputZpData[j] != inputZpData[0]) + if (inputZpData[j] != inputZpData[0]) { inputZeroPointType = zpType::PerChannel; + } } // Only enable per-tensor zero point on avx512-amx and avx512-core-vnni, avx2_vnni_2. // avx2_vnni is not enabled per-tensor z because of perf regression brgconv with per-tensor zpcompared with jit @@ -1763,10 +1793,11 @@ void Convolution::initializeInputZeroPoints(const uint8_t* inputZpData, const si // node would determine how to create post-ops attribute and prioritize to choose final onednn kernel. if (inputZeroPointType == zpType::PerTensor && (impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core_amx) || impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core_vnni) || - impl::cpu::x64::mayiuse(impl::cpu::x64::avx2_vnni_2))) + impl::cpu::x64::mayiuse(impl::cpu::x64::avx2_vnni_2))) { inputZeroPoints.push_back(static_cast(inputZpData[0])); - else + } else { inputZeroPointType = zpType::PerChannel; + } } VectorDims Convolution::makeInputDummyShape(const Shape& inpShape) const { diff --git a/src/plugins/intel_cpu/src/nodes/convert.cpp b/src/plugins/intel_cpu/src/nodes/convert.cpp index f82e6da109672b..59d8b7803eb90d 100644 --- a/src/plugins/intel_cpu/src/nodes/convert.cpp +++ b/src/plugins/intel_cpu/src/nodes/convert.cpp @@ -64,26 +64,32 @@ Convert::Convert(const Shape& shape, void Convert::getSupportedDescriptors() { // if tensor descriptors are set via setDescs method we need to update the inDims/outDims data // from correspond tensor descriptors. - if (outputShapes.empty()) + if (outputShapes.empty()) { outputShapes.push_back(output->getShape()); - if (inputShapes.empty()) + } + if (inputShapes.empty()) { inputShapes.push_back(input->getShape()); - if (getParentEdges().size() != 1) + } + if (getParentEdges().size() != 1) { THROW_CPU_NODE_ERR("has incorrect number of input edges"); - if (getChildEdges().empty()) + } + if (getChildEdges().empty()) { THROW_CPU_NODE_ERR("has incorrect number of output edges"); + } } bool Convert::isSupportedDesc(const MemoryDesc& desc) { bool isSupported = desc.getType() & MemoryDescType::Blocked; - if (desc.getType() == MemoryDescType::DnnlBlocked) + if (desc.getType() == MemoryDescType::DnnlBlocked) { isSupported &= desc.as()->hasEmptyExtraData(); + } return isSupported; } void Convert::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } NodeConfig config; PortConfig dataIn; @@ -180,8 +186,9 @@ void Convert::execute(const dnnl::stream& strm) { const auto parentPaddElemCount = parentMem.getDescWithType()->getPaddedElementsCount(); const auto childPaddElemCount = childMem.getDescWithType()->getPaddedElementsCount(); - if (parentPaddElemCount != childPaddElemCount) + if (parentPaddElemCount != childPaddElemCount) { THROW_CPU_NODE_ERR("has different elements number in input and output buffers"); + } MemoryCPtr srcMemory = getSrcMemoryAtPort(0); MemoryPtr dstMemory = getDstMemoryAtPort(0); diff --git a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp index 445309466b2125..acc4670b61b506 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp +++ b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp @@ -35,32 +35,38 @@ CTCGreedyDecoder::CTCGreedyDecoder(const std::shared_ptr& op, const Gr OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } - if (getOriginalInputsNumber() != 2) + if (getOriginalInputsNumber() != 2) { THROW_CPU_NODE_ERR("has invalid number of input edges: ", getOriginalInputsNumber()); - if (getOriginalOutputsNumber() != 1) + } + if (getOriginalOutputsNumber() != 1) { THROW_CPU_NODE_ERR("has invalid number of outputs edges: ", getOriginalOutputsNumber()); + } const auto& dataDims = getInputShapeAtPort(DATA_INDEX).getDims(); const auto& seqDims = getInputShapeAtPort(SEQUENCE_LENGTH_INDEX).getDims(); - if (!dimsEqualWeak(dataDims[0], seqDims[0]) || !dimsEqualWeak(dataDims[1], seqDims[1])) + if (!dimsEqualWeak(dataDims[0], seqDims[0]) || !dimsEqualWeak(dataDims[1], seqDims[1])) { THROW_CPU_NODE_ERR("has invalid input shapes."); + } auto greedyDecOp = ov::as_type_ptr(op); mergeRepeated = greedyDecOp->get_ctc_merge_repeated(); } void CTCGreedyDecoder::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } ov::element::Type inDataPrecision = getOriginalInputPrecisionAtPort(DATA_INDEX); - if (!one_of(inDataPrecision, ov::element::f32, ov::element::bf16, ov::element::f16)) + if (!one_of(inDataPrecision, ov::element::f32, ov::element::bf16, ov::element::f16)) { THROW_CPU_NODE_ERR("has unsupported 'data' input precision: ", inDataPrecision); + } ov::element::Type seqLenPrecision = getOriginalInputPrecisionAtPort(SEQUENCE_LENGTH_INDEX); - if (!one_of(seqLenPrecision, ov::element::f32, ov::element::bf16, ov::element::f16)) + if (!one_of(seqLenPrecision, ov::element::f32, ov::element::bf16, ov::element::f16)) { THROW_CPU_NODE_ERR("has unsupported 'sequence_length' input precision: ", seqLenPrecision); + } addSupportedPrimDesc({{LayoutType::ncsp, ov::element::f32}, {LayoutType::ncsp, ov::element::f32}}, {{LayoutType::ncsp, ov::element::f32}}, @@ -84,8 +90,9 @@ void CTCGreedyDecoder::execute(const dnnl::stream& strm) { parallel_for(B, [&](size_t b) { size_t t = 0; for (; t < T; t++) { - if (sequenceMask[B * t + b] == 0.f) + if (sequenceMask[B * t + b] == 0.f) { break; + } } sequenceLengths[b] = t; }); @@ -102,8 +109,9 @@ void CTCGreedyDecoder::execute(const dnnl::stream& strm) { auto threadBody = [&](const int ithr, const int nthr) { size_t start(0lu), end(0lu); splitter(workAmount, nthr, ithr, start, end); - if (start >= end) + if (start >= end) { return; + } size_t tStart = 0lu, bStart = 0lu; for (; bStart < B; bStart++) { tStart += sequenceLengths[bStart]; diff --git a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp index 5f8713fd71ef3b..570438e55ea98f 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp +++ b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp @@ -35,37 +35,44 @@ CTCGreedyDecoderSeqLen::CTCGreedyDecoderSeqLen(const std::shared_ptr& OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } - if (getOriginalInputsNumber() < 2 || getOriginalInputsNumber() > 3) + if (getOriginalInputsNumber() < 2 || getOriginalInputsNumber() > 3) { THROW_CPU_NODE_ERR("has invalid number of input edges: ", getOriginalInputsNumber()); - if (getOriginalOutputsNumber() != 2) + } + if (getOriginalOutputsNumber() != 2) { THROW_CPU_NODE_ERR("has invalid number of outputs edges: ", getOriginalOutputsNumber()); + } const auto& dataDims = getInputShapeAtPort(DATA_INDEX).getDims(); const auto& seqDims = getInputShapeAtPort(SEQUENCE_LENGTH_INDEX).getDims(); - if (!dimsEqualWeak(dataDims[0], seqDims[0])) + if (!dimsEqualWeak(dataDims[0], seqDims[0])) { THROW_CPU_NODE_ERR("has invalid input shapes."); + } auto greedyDecOp = ov::as_type_ptr(op); mergeRepeated = greedyDecOp->get_merge_repeated(); } void CTCGreedyDecoderSeqLen::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } ov::element::Type inDataPrecision = getOriginalInputPrecisionAtPort(DATA_INDEX); - if (!one_of(inDataPrecision, ov::element::f32, ov::element::bf16, ov::element::f16)) + if (!one_of(inDataPrecision, ov::element::f32, ov::element::bf16, ov::element::f16)) { THROW_CPU_NODE_ERR("has unsupported 'data' input precision: ", inDataPrecision); + } ov::element::Type seqLenPrecision = getOriginalInputPrecisionAtPort(SEQUENCE_LENGTH_INDEX); - if (seqLenPrecision != ov::element::i32 && seqLenPrecision != ov::element::i64) + if (seqLenPrecision != ov::element::i32 && seqLenPrecision != ov::element::i64) { THROW_CPU_NODE_ERR("has unsupported 'sequence_length' input precision: ", seqLenPrecision); + } std::vector inDataConf; inDataConf.reserve(inputShapes.size()); inDataConf.emplace_back(LayoutType::ncsp, ov::element::f32); - for (size_t i = 1; i < inputShapes.size(); ++i) + for (size_t i = 1; i < inputShapes.size(); ++i) { inDataConf.emplace_back(LayoutType::ncsp, ov::element::i32); + } addSupportedPrimDesc(inDataConf, {{LayoutType::ncsp, ov::element::i32}, {LayoutType::ncsp, ov::element::i32}}, @@ -87,8 +94,9 @@ void CTCGreedyDecoderSeqLen::execute(const dnnl::stream& strm) { const size_t TC = T * C; int blankIndex = C - 1; - if (inputShapes.size() > BLANK_INDEX) + if (inputShapes.size() > BLANK_INDEX) { blankIndex = (getSrcDataAtPortAs(BLANK_INDEX))[0]; + } size_t workAmount = 0; for (size_t b = 0; b < B; b++) { @@ -108,8 +116,9 @@ void CTCGreedyDecoderSeqLen::execute(const dnnl::stream& strm) { auto threadBody = [&](const int ithr, const int nthr) { size_t start(0lu), end(0lu); splitter(workAmount, nthr, ithr, start, end); - if (start >= end) + if (start >= end) { return; + } size_t tStart = 0lu, bStart = 0lu; for (; bStart < B; bStart++) { tStart += sequenceLengths[bStart]; diff --git a/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp b/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp index 67a80745743528..1da1c6d7dfa802 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp +++ b/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp @@ -33,8 +33,9 @@ CTCLoss::CTCLoss(const std::shared_ptr& op, const GraphContext::CPtr& OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } - if (getOriginalInputsNumber() != 4 && getOriginalInputsNumber() != 5) + if (getOriginalInputsNumber() != 4 && getOriginalInputsNumber() != 5) { THROW_CPU_NODE_ERR("has invalid inputs number."); + } auto ctcLossOp = ov::as_type_ptr(op); ctcMergeRepeated = ctcLossOp->get_ctc_merge_repeated(); @@ -43,14 +44,16 @@ CTCLoss::CTCLoss(const std::shared_ptr& op, const GraphContext::CPtr& } void CTCLoss::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } std::vector inDataConf; inDataConf.reserve(inputShapes.size()); inDataConf.emplace_back(LayoutType::ncsp, ov::element::f32); - for (size_t i = 1; i < inputShapes.size(); ++i) + for (size_t i = 1; i < inputShapes.size(); ++i) { inDataConf.emplace_back(LayoutType::ncsp, ov::element::i32); + } addSupportedPrimDesc(inDataConf, {{LayoutType::ncsp, ov::element::f32}}, impl_desc_type::ref_any); } @@ -87,8 +90,9 @@ void CTCLoss::execute(const dnnl::stream& strm) { auto threadBody_1 = [&](const int ithr, const int nthr) { size_t start(0lu), end(0lu); splitter(batchNum, nthr, ithr, start, end); - if (start >= end) + if (start >= end) { return; + } for (size_t b = start; b < end; b++) { if (logitsLength[b] < 0 || labelsLength[b] < 0 || logitsLength[b] > static_cast(maxTime) || @@ -155,8 +159,9 @@ void CTCLoss::execute(const dnnl::stream& strm) { if (returnCode != 0) { std::string resErr(""); for (auto& err : errorMsgB) { - if (!err.empty()) + if (!err.empty()) { resErr += err + "\n"; + } } THROW_CPU_NODE_ERR(resErr); } @@ -172,8 +177,9 @@ void CTCLoss::execute(const dnnl::stream& strm) { size_t start(0lu), end(0lu); size_t sB(0lu), sT(0lu); splitter(workAmount2, nthr, ithr, start, end); - if (start >= end) + if (start >= end) { return; + } int64_t cw = 0, st = start; for (; sB < batchNum; sB++) { cw += logitsLength[sB]; @@ -220,18 +226,20 @@ void CTCLoss::execute(const dnnl::stream& strm) { } else if (log2 == -float_inf) { return log1; } else { - if (log1 > log2) + if (log1 > log2) { return log1 + std::log1pf(std::exp(log2 - log1)); - else + } else { return log2 + std::log1pf(std::exp(log1 - log2)); + } } }; auto threadBody_3 = [&](const int ithr, const int nthr) { size_t start(0lu), end(0lu); splitter(batchNum, nthr, ithr, start, end); - if (start >= end) + if (start >= end) { return; + } // As per Connectionist Temporal Classification - Labeling Unsegmented Sequence Data with Recurrent Neural // Networks: Graves et al., 2016, paragraph 4.1 (10) @@ -241,8 +249,9 @@ void CTCLoss::execute(const dnnl::stream& strm) { const int actualLogitLen = logitsLength[b]; const int decodedTargetLen = decodedTargetLenB[b]; std::vector> logBwd(decodedTargetLen, std::vector(actualLogitLen, -float_inf)); - for (int s = decodedTargetLen - 2; s < decodedTargetLen; s++) + for (int s = decodedTargetLen - 2; s < decodedTargetLen; s++) { logBwd[s][actualLogitLen - 1] = 0.f; + } for (int t = actualLogitLen - 2; t >= 0; t--) { const int t_1 = t + 1; diff --git a/src/plugins/intel_cpu/src/nodes/cum_sum.cpp b/src/plugins/intel_cpu/src/nodes/cum_sum.cpp index bcedab54caeaeb..dec5a42e14eeb8 100644 --- a/src/plugins/intel_cpu/src/nodes/cum_sum.cpp +++ b/src/plugins/intel_cpu/src/nodes/cum_sum.cpp @@ -38,8 +38,9 @@ CumSum::CumSum(const std::shared_ptr& op, const GraphContext::CPtr& co } if ((getOriginalInputsNumber() != numOfInputs && getOriginalInputsNumber() != (numOfInputs - 1)) || - getOriginalOutputsNumber() != 1) + getOriginalOutputsNumber() != 1) { THROW_CPU_NODE_ERR("has incorrect number of input/output edges!"); + } const auto& dataShape = getInputShapeAtPort(CUM_SUM_DATA); numOfDims = dataShape.getRank(); @@ -48,25 +49,29 @@ CumSum::CumSum(const std::shared_ptr& op, const GraphContext::CPtr& co } const auto cumsum = ov::as_type_ptr(op); - if (cumsum == nullptr) + if (cumsum == nullptr) { OPENVINO_THROW("Operation with name '", op->get_friendly_name(), "' is not an instance of CumSum from opset3."); + } exclusive = cumsum->is_exclusive(); reverse = cumsum->is_reverse(); if (getOriginalInputsNumber() == numOfInputs) { const auto axis_shape = cumsum->get_input_partial_shape(AXIS); - if (axis_shape.is_dynamic() || !ov::is_scalar(axis_shape.to_shape())) + if (axis_shape.is_dynamic() || !ov::is_scalar(axis_shape.to_shape())) { THROW_CPU_NODE_ERR("doesn't support 'axis' input tensor with non scalar rank"); + } } - if (dataShape != getOutputShapeAtPort(0)) + if (dataShape != getOutputShapeAtPort(0)) { THROW_CPU_NODE_ERR("has different 'data' input and output dimensions"); + } } void CumSum::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } dataPrecision = getOriginalInputPrecisionAtPort(CUM_SUM_DATA); if (!one_of(dataPrecision, @@ -78,27 +83,31 @@ void CumSum::initSupportedPrimitiveDescriptors() { ov::element::u64, ov::element::bf16, ov::element::f16, - ov::element::f32)) + ov::element::f32)) { THROW_CPU_NODE_ERR("has unsupported 'data' input precision: ", dataPrecision.get_type_name()); + } if (inputShapes.size() == numOfInputs) { const auto& axisTensorPrec = getOriginalInputPrecisionAtPort(AXIS); - if (axisTensorPrec != ov::element::i32 && axisTensorPrec != ov::element::i64) + if (axisTensorPrec != ov::element::i32 && axisTensorPrec != ov::element::i64) { THROW_CPU_NODE_ERR("has unsupported 'axis' input precision: ", axisTensorPrec.get_type_name()); + } } std::vector inDataConf; inDataConf.reserve(inputShapes.size()); inDataConf.emplace_back(LayoutType::ncsp, dataPrecision); - for (size_t i = 1; i < inputShapes.size(); ++i) + for (size_t i = 1; i < inputShapes.size(); ++i) { inDataConf.emplace_back(LayoutType::ncsp, ov::element::i32); + } addSupportedPrimDesc(inDataConf, {{LayoutType::ncsp, dataPrecision}}, impl_desc_type::ref_any); } void CumSum::execute(const dnnl::stream& strm) { - if (inputShapes.size() == numOfInputs) + if (inputShapes.size() == numOfInputs) { axis = getAxis(getParentEdgeAt(AXIS)->getMemory(), getParentEdgeAt(CUM_SUM_DATA)->getMemory()); + } OV_SWITCH(intel_cpu, CumSumExecute, @@ -143,12 +152,15 @@ void CumSum::cumSum(const dataType* input, dataType* output, const VectorDims& s size_t j = 0; const auto& shape = getParentEdgeAt(CUM_SUM_DATA)->getMemory().getStaticDims(); for (size_t i = 0; i < shape.size(); i++) { - if (i == axis) + if (i == axis) { continue; + } iterationRange[j++] = shape[i]; } - size_t work_amount_dst = - std::accumulate(iterationRange.begin(), iterationRange.end(), size_t(1), std::multiplies()); + size_t work_amount_dst = std::accumulate(iterationRange.begin(), + iterationRange.end(), + static_cast(1), + std::multiplies()); parallel_nt(0, [&](const int ithr, const int nthr) { size_t start = 0, end = 0; VectorDims counters(numOfDims - 1, 0); @@ -256,8 +268,9 @@ size_t CumSum::getAxis(const IMemory& _axis, const IMemory& _data) const { THROW_CPU_NODE_ERR("doesn't support 'axis' input with precision: ", axisPrecision.get_type_name()); } } - if (axisValueFromBlob < -dataShapeSize || axisValueFromBlob > dataShapeSize - 1) + if (axisValueFromBlob < -dataShapeSize || axisValueFromBlob > dataShapeSize - 1) { THROW_CPU_NODE_ERR("has axis with a value out of range: ", axisValueFromBlob); + } return axisValueFromBlob >= 0 ? axisValueFromBlob : (axisValueFromBlob + dataShapeSize); } diff --git a/src/plugins/intel_cpu/src/nodes/deconv.cpp b/src/plugins/intel_cpu/src/nodes/deconv.cpp index 4090244a17ec32..71d3737595f4e2 100644 --- a/src/plugins/intel_cpu/src/nodes/deconv.cpp +++ b/src/plugins/intel_cpu/src/nodes/deconv.cpp @@ -194,8 +194,9 @@ bool Deconvolution::isSupportedOperation(const std::shared_ptr& Deconvolution::Deconvolution(const std::shared_ptr& op, const GraphContext::CPtr& context) : Node(op, context, DeconvolutionShapeInferFactory(op)) { std::string errorMessage; - if (!isSupportedOperation(op, errorMessage)) + if (!isSupportedOperation(op, errorMessage)) { OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); + } const auto& weightDims = getWeightDims(); @@ -253,8 +254,9 @@ Deconvolution::Deconvolution(const std::shared_ptr& op, const GraphCon externOutShape = inputShapes.size() == 3; biasPort = externOutShape ? 3 : 2; - if (externOutShape && (isConstOutShape = ov::is_type(op->get_input_node_shared_ptr(2)))) + if (externOutShape && (isConstOutShape = ov::is_type(op->get_input_node_shared_ptr(2)))) { lastOutputSpatialDims = ov::as_type(op->get_input_node_ptr(2))->cast_vector(); + } if (externOutShape && isDynamicNode()) { const auto spDimsNum = getInputShapeAtPort(0).getRank() - 2; if (getInputShapeAtPort(2).getStaticDims()[0] != spDimsNum || @@ -266,8 +268,9 @@ Deconvolution::Deconvolution(const std::shared_ptr& op, const GraphCon size_t spatialRank = getInputShapeAtPort(0).getRank() - 2; auto weightDimsReversItr = weightDims.crbegin(); is1x1 = true; - for (size_t i = 0; i < spatialRank; ++i) + for (size_t i = 0; i < spatialRank; ++i) { is1x1 = is1x1 && *(weightDimsReversItr++) == 1; + } // 1x1 deconv has some test case failed. The cause is upstream ONEDNN unsupported brgemm implementation cases are // enabled in forked ONEDNNN // https://github.com/openvinotoolkit/oneDNN/blob/117e287000b48a34a7218fcaa274a91571141728/src/common/convolution.cpp#L138. @@ -286,8 +289,9 @@ Deconvolution::Deconvolution(const std::shared_ptr& op, const GraphCon void Deconvolution::createDnnlCompatibleWeights() { MemoryPtr blob = getSrcMemoryAtPort(1); - if (!blob) + if (!blob) { OPENVINO_THROW("Cannot get const weights blob for node ", getName(), "."); + } weightIsConst = getParentEdgeAt(1)->getParent()->isConstant(); auto blockedDims = getWeightDims(); @@ -297,8 +301,9 @@ void Deconvolution::createDnnlCompatibleWeights() { } else { order = {1, 0}; } - for (size_t i = 2 + withGroups; i < blockedDims.size(); i++) + for (size_t i = 2 + withGroups; i < blockedDims.size(); i++) { order.push_back(i); + } auto desc = CpuBlockedMemoryDesc(DnnlExtensionUtils::DataTypeToElementType(blob->getDataType()), Shape(dnnlCompatibleWeiDims), @@ -317,8 +322,9 @@ bool Deconvolution::canBeExecutedInInt8() const { return false; } - if (!withGroups && deconvAttrs.stride.back() > 3) + if (!withGroups && deconvAttrs.stride.back() > 3) { return false; + } if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core)) { const auto& inMaxDims = getOutputShapeAtPort(0).getMaxDims(); if (std::any_of(inMaxDims.begin(), inMaxDims.end(), [](Dim dim) { @@ -330,25 +336,30 @@ bool Deconvolution::canBeExecutedInInt8() const { // heuristicParam = IC^2 * SP size_t heuristicConst = 67108864; auto heuristicParam = IC * IC; - for (size_t i = 2; i < inMaxDims.size(); i++) + for (size_t i = 2; i < inMaxDims.size(); i++) { heuristicParam *= inMaxDims[i]; - if (heuristicParam > heuristicConst) + } + if (heuristicParam > heuristicConst) { return false; + } } for (size_t i = 0; i < deconvAttrs.kernel.size(); i++) { - if (deconvAttrs.kernel[i] < deconvAttrs.stride[i]) + if (deconvAttrs.kernel[i] < deconvAttrs.stride[i]) { return false; + } } // not supported in oneDNN int channelBlock = impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core) ? 16 : impl::cpu::x64::mayiuse(impl::cpu::x64::avx2) ? 8 : 4; - if (withGroups && !isDW && (IC % channelBlock != 0 || OC % channelBlock != 0)) + if (withGroups && !isDW && (IC % channelBlock != 0 || OC % channelBlock != 0)) { return false; - if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core) && deconvAttrs.stride.back() > 3) + } + if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core) && deconvAttrs.stride.back() > 3) { return false; + } ov::element::Type inPrecision = getOriginalInputPrecisionAtPort(0); auto inputDataType = DnnlExtensionUtils::ElementTypeToDataType(inPrecision); @@ -356,15 +367,17 @@ bool Deconvolution::canBeExecutedInInt8() const { ov::element::Type weiPrecision = getOriginalInputPrecisionAtPort(1); auto weightsDataType = DnnlExtensionUtils::ElementTypeToDataType(weiPrecision); - if (isDW && (inputDataType == dnnl_s8 || deconvAttrs.dilation.size() == 3)) + if (isDW && (inputDataType == dnnl_s8 || deconvAttrs.dilation.size() == 3)) { return false; + } return (inputDataType == dnnl_s8 || inputDataType == dnnl_u8) && weightsDataType == dnnl_s8; } bool Deconvolution::canFuse(const NodePtr& node) const { - if (canBeExecutedInInt8()) + if (canBeExecutedInInt8()) { return canFuseSimpleOperation(node); + } // Upstream ONEDNN conv_backward_data primitive can't support any post-ops, fork onednn added depthwise support in // conv_backward_data JIT implementation. ONEDNN deconv primitive can support most of post-ops, but the post-ops // implementation details are different. So current deconv implementation list in onednn has 2 kinds of implements: @@ -498,8 +511,9 @@ std::vector Deconvolution::getAvailableFormatsForDims(const } void Deconvolution::getSupportedDescriptors() { - if (!descs.empty()) + if (!descs.empty()) { return; + } isInt8 = canBeExecutedInInt8(); deconvAttrs.withBiasesParam = withBiases = externOutShape ? getOriginalInputsNumber() == 4 : getOriginalInputsNumber() == 3; @@ -509,22 +523,28 @@ void Deconvolution::getSupportedDescriptors() { if (isInt8) { // TODO: We have to extend jit_avx512_core_x8s8s32x_deconv_fwd_kernel from oneDNN to support BF16 output data // type - if (ov::element::bf16 == inPrecision) + if (ov::element::bf16 == inPrecision) { inPrecision = ov::element::f32; - if (ov::element::bf16 == outPrecision) + } + if (ov::element::bf16 == outPrecision) { outPrecision = ov::element::f32; + } } else { - if (!inPrecision.is_real()) + if (!inPrecision.is_real()) { inPrecision = ov::element::f32; - if (!outPrecision.is_real()) + } + if (!outPrecision.is_real()) { outPrecision = ov::element::f32; + } } auto inputDataType = DnnlExtensionUtils::ElementTypeToDataType(inPrecision); outputDataType = DnnlExtensionUtils::ElementTypeToDataType(outPrecision); - if (inputDataType == memory::data_type::bf16 || outputDataType == memory::data_type::bf16) + if (inputDataType == memory::data_type::bf16 || outputDataType == memory::data_type::bf16) { inputDataType = outputDataType = memory::data_type::bf16; - if (inputDataType == memory::data_type::f16 || outputDataType == memory::data_type::f16) + } + if (inputDataType == memory::data_type::f16 || outputDataType == memory::data_type::f16) { inputDataType = outputDataType = memory::data_type::f16; + } if (!fusedWith.empty()) { outputDataType = DnnlExtensionUtils::ElementTypeToDataType( fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0)); @@ -838,8 +858,9 @@ const std::vector& Deconvolution::getDefaultImplPriority() { impl_desc_type::jit_gemm, impl_desc_type::ref_any, impl_desc_type::ref, }; - if (!asymmetricPaddingAnd1x1) + if (!asymmetricPaddingAnd1x1) { return priorities; + } static const std::vector priorities_wo_brgemm = [&] { std::vector result; @@ -882,15 +903,19 @@ void Deconvolution::prepareParams() { auto srcMemPtr = getSrcMemoryAtPort(0); auto wghMemPtr = getSrcMemoryAtPort(1); auto dstMemPtr = getDstMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isDefined()) + if (!dstMemPtr || !dstMemPtr->isDefined()) { OPENVINO_THROW("Destination memory is undefined."); - if (!srcMemPtr || !srcMemPtr->isDefined()) + } + if (!srcMemPtr || !srcMemPtr->isDefined()) { OPENVINO_THROW("Input memory is undefined."); - if (!wghMemPtr || !wghMemPtr->isDefined()) + } + if (!wghMemPtr || !wghMemPtr->isDefined()) { OPENVINO_THROW("Weight memory is undefined."); + } auto selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) + if (selected_pd == nullptr) { OPENVINO_THROW("Preferable primitive descriptor is not set for node ", getName(), "."); + } if (useACL) { if (isDynamicNode()) { @@ -935,14 +960,16 @@ void Deconvolution::prepareParams() { MemoryPtr biasMemPtr = nullptr; DnnlMemoryDescCPtr biasDesc; - if (!dnnlCompatibleWeights) + if (!dnnlCompatibleWeights) { createDnnlCompatibleWeights(); + } DnnlMemoryDescPtr wghDesc = dnnlCompatibleWeights->getDescWithType(); if (withBiases) { biasMemPtr = getSrcMemoryAtPort(biasPort); - if (!biasMemPtr || !biasMemPtr->isDefined()) + if (!biasMemPtr || !biasMemPtr->isDefined()) { OPENVINO_THROW("Bias memory memory is undefined."); + } biasDesc = biasMemPtr->getDescWithType(); } bool is1x1PaddingAsymmetric = false; @@ -975,8 +1002,9 @@ void Deconvolution::prepareParams() { (one_of(srcDataType, memory::data_type::s8, memory::data_type::u8)) ? memory::data_type::s8 : srcDataType; auto wghDescAny = dnnl::memory::desc(DnnlExtensionUtils::convertToDnnlDims(weiDims), weiDataType, memory::format_tag::any); - if (key.bias) + if (key.bias) { dnnlBiasDesc = key.bias->getDnnlDesc(); + } desc = createDescriptorInternal(key.inp0->getDnnlDesc(), wghDescAny, @@ -996,8 +1024,9 @@ void Deconvolution::prepareParams() { while (static_cast(itpd)) { impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str()); // Skip the brgemm implemenation for asymmetric padding case because of the accuracy issue. - if (key.isImplicit1x1PaddingAsymmetric && (impl_type & impl_desc_type::brgconv)) + if (key.isImplicit1x1PaddingAsymmetric && (impl_type & impl_desc_type::brgconv)) { continue; + } if (impl_type == key.implType) { auto prim_desc = deconvolution_forward::primitive_desc(itpd.get()); execPtr = std::make_shared(prim_desc, @@ -1056,8 +1085,9 @@ void Deconvolution::prepareParams() { auto result = cache->getOrCreate(key, builder); execPtr = result.first; - if (!execPtr) + if (!execPtr) { OPENVINO_THROW("Primitive descriptor was not found for node ", getName(), "."); + } primArgs[DNNL_ARG_SRC] = srcMemPtr->getPrimitive(); primArgs[DNNL_ARG_DST] = dstMemPtr->getPrimitive(); @@ -1075,8 +1105,9 @@ void Deconvolution::prepareParams() { primArgs[DNNL_ARG_WEIGHTS] = dnnlCompatibleWeights->getPrimitive(); } - if (withBiases) + if (withBiases) { primArgs[DNNL_ARG_BIAS] = biasMemPtr->getPrimitive(); + } Node::appendPostOpArgs(*pAttrLocal, primArgs, postOpsArgs); @@ -1104,8 +1135,9 @@ void Deconvolution::createDescriptor(const std::vector& inputDesc dnnl::memory::desc bias_candidate; // grouping and autoblocking is not compatible - if ((withGroups && !isDW) && (dnnlInDesc.blocksExtended() || dnnlOutDesc.blocksExtended())) + if ((withGroups && !isDW) && (dnnlInDesc.blocksExtended() || dnnlOutDesc.blocksExtended())) { return; + } AttrPtr attr = initPrimitiveAttr(); if (withBiases) { diff --git a/src/plugins/intel_cpu/src/nodes/def_conv.cpp b/src/plugins/intel_cpu/src/nodes/def_conv.cpp index df2c08a5b9e75b..28724027a6900c 100644 --- a/src/plugins/intel_cpu/src/nodes/def_conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/def_conv.cpp @@ -54,8 +54,9 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ mov(reg_sampled_offs, ptr[this->param1 + GET_OFF(sampledCoords)]); mov(reg_kernel, ptr[this->param1 + GET_OFF(filt)]); - if (jcp_.with_bias) + if (jcp_.with_bias) { mov(reg_bias, ptr[this->param1 + GET_OFF(bias)]); + } mov(reg_output, ptr[this->param1 + GET_OFF(dst)]); mov(reg_input_buffer_temp, ptr[this->param1 + GET_OFF(buf)]); mov(oh_pos_temp, ptr[param1 + GET_OFF(oh_pos)]); @@ -166,8 +167,9 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ L(ow_tail); { - if (jcp_.ow % jcp_.ur_w != 0) + if (jcp_.ow % jcp_.ur_w != 0) { oc_loop(jcp_.ow % jcp_.ur_w); + } } } @@ -207,8 +209,8 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ for (int ic = 0; ic < ic_step; ic++) { for (int ow = 0; ow < ow_step; ow++) { Vmm vmm_src = get_vmm_src(ow); - size_t inp_off = - (size_t)ow * jcp_.kh * jcp_.kw * jcp_.ic + kh * jcp_.kw * jcp_.ic + kw * jcp_.ic + ic; + size_t inp_off = static_cast(ow) * jcp_.kh * jcp_.kw * jcp_.ic + + kh * jcp_.kw * jcp_.ic + kw * jcp_.ic + ic; uni_vbroadcastss(vmm_src, ptr[aux2_reg_input_buffer + inp_off * jcp_.typesize_in]); } @@ -216,10 +218,11 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ for (int r = 0; r < repeats; r++) { for (int ocb = 0; ocb < oc_blocks_step; ocb++) { Vmm vmm_ker = get_vmm_ker(0); - size_t ker_off = - (size_t)ocb * jcp_.nb_ic * jcp_.kh * jcp_.kw * jcp_.ic_block * jcp_.oc_block + - kh * jcp_.kw * jcp_.ic_block * jcp_.oc_block + kw * jcp_.ic_block * jcp_.oc_block + - ic * jcp_.oc_block + r * jcp_.oc_block / 2; + size_t ker_off = static_cast(ocb) * jcp_.nb_ic * jcp_.kh * jcp_.kw * jcp_.ic_block * + jcp_.oc_block + + kh * jcp_.kw * jcp_.ic_block * jcp_.oc_block + + kw * jcp_.ic_block * jcp_.oc_block + ic * jcp_.oc_block + + r * jcp_.oc_block / 2; uni_vmovups(vmm_ker, ptr[aux2_reg_kernel + ker_off * jcp_.typesize_in]); for (int ow = 0; ow < ow_step; ow++) { @@ -362,8 +365,8 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ Vmm vmm_v4 = Vmm(xmm_v4.getIdx()); // offsets computation - size_t ind_off_hh = - sampledPointsPerPixel * (((size_t)kh * jcp_.kw + kw) + ow * (jcp_.kh * jcp_.kw)); + size_t ind_off_hh = sampledPointsPerPixel * + ((static_cast(kh) * jcp_.kw + kw) + ow * (jcp_.kh * jcp_.kw)); size_t ind_off_hl = ind_off_hh + 1; size_t ind_off_lh = ind_off_hl + 1; size_t ind_off_ll = ind_off_lh + 1; @@ -397,7 +400,7 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ uni_vbroadcastss(xmm_v4, dword[aux_reg_sampled_wei + ind_off_hh * jcp_.typesize_sampled_wei]); - size_t input_buffer_off = (size_t)kh * jcp_.kw * jcp_.ic + kw * jcp_.ic; + size_t input_buffer_off = static_cast(kh) * jcp_.kw * jcp_.ic + kw * jcp_.ic; uni_vpmovsxdq(xmm_v1_off, xmm_v1_off); uni_vmovq(reg_tmp_64, xmm_v1_off); @@ -473,7 +476,7 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ uni_vbroadcastss(xmm_v4, dword[aux_reg_sampled_wei + ind_off_hh * jcp_.typesize_sampled_wei]); - size_t input_buffer_off = (size_t)kh * jcp_.kw * jcp_.ic + kw * jcp_.ic; + size_t input_buffer_off = static_cast(kh) * jcp_.kw * jcp_.ic + kw * jcp_.ic; uni_vpmovsxdq(xmm_v1_off, xmm_v1_off); uni_vmovq(reg_tmp_64, xmm_v1_off); imul(reg_tmp_64, reg_tmp_64, jcp_.ic * jcp_.typesize_in); @@ -558,7 +561,7 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ if (jcp_.with_bias) { for (int r = 0; r < repeats; r++) { for (int ocb = 0; ocb < oc_blocks_step; ocb++) { - size_t bias_off = (size_t)ocb * jcp_.oc_block + r * jcp_.oc_block / 2; + size_t bias_off = static_cast(ocb) * jcp_.oc_block + r * jcp_.oc_block / 2; uni_vmovups(Vmm(0), ptr[aux_reg_bias + bias_off * jcp_.typesize_bia]); for (int ow = 0; ow < ow_step; ow++) { @@ -585,11 +588,11 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ Xmm xmm_dst = get_xmm_acc(r * jcp_.ur_w * jcp_.nb_oc_blocking + ow); if (isa == avx512_core) { - size_t out_off = (size_t)ow * jcp_.oc; + size_t out_off = static_cast(ow) * jcp_.oc; uni_vmovups(ptr[aux_reg_output + out_off * jcp_.typesize_out], vmm_dst | ktail_mask); } else { for (int oc = 0; oc < tail_size; oc++) { - size_t out_off = (size_t)ow * jcp_.oc + oc + r * (jcp_.oc_block / 2); + size_t out_off = static_cast(ow) * jcp_.oc + oc + r * (jcp_.oc_block / 2); uni_vmovq(reg_tmp_64, xmm_dst); mov(ptr[aux_reg_output + out_off * jcp_.typesize_out], reg_tmp_32); @@ -610,8 +613,8 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ for (int ocb = 0; ocb < oc_blocks_step; ocb++) { for (int ow = 0; ow < ow_step; ow++) { Vmm vmm_acc = get_vmm_acc(r * jcp_.ur_w * jcp_.nb_oc_blocking + ocb * ow_step + ow); - size_t out_off = - (size_t)ow * jcp_.oc * jcp_.ngroups + ocb * jcp_.oc_block + r * (jcp_.oc_block / 2); + size_t out_off = static_cast(ow) * jcp_.oc * jcp_.ngroups + ocb * jcp_.oc_block + + r * (jcp_.oc_block / 2); uni_vmovups(ptr[aux_reg_output + out_off * jcp_.typesize_out], vmm_acc); } } @@ -775,8 +778,9 @@ DeformableConvolution::DeformableConvolution(const std::shared_ptr& op OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } auto defConvNodeBase = ov::as_type_ptr(op); - if (defConvNodeBase == nullptr) + if (defConvNodeBase == nullptr) { THROW_CPU_NODE_ERR("is not an instance of DeformableConvolutionBase."); + } defConvAttr.group = defConvNodeBase->get_group(); defConvAttr.deformable_group = defConvNodeBase->get_deformable_group(); @@ -796,8 +800,9 @@ DeformableConvolution::DeformableConvolution(const std::shared_ptr& op if (op->get_type_info() == ov::op::v8::DeformableConvolution::get_type_info_static()) { auto defConvNode = ov::as_type_ptr(op); - if (defConvNode == nullptr) + if (defConvNode == nullptr) { THROW_CPU_NODE_ERR("is not an instance of DeformableConvolution from opset8."); + } defConvAttr.with_bilinear_pad = defConvNode->get_bilinear_interpolation_pad(); } else { defConvAttr.with_bilinear_pad = false; @@ -805,10 +810,12 @@ DeformableConvolution::DeformableConvolution(const std::shared_ptr& op } void DeformableConvolution::getSupportedDescriptors() { - if (getParentEdges().size() != 3 && getParentEdges().size() != 4) + if (getParentEdges().size() != 3 && getParentEdges().size() != 4) { THROW_CPU_NODE_ERR("has incorrect number of input edges"); - if (getChildEdges().empty()) + } + if (getChildEdges().empty()) { THROW_CPU_NODE_ERR("has incorrect number of output edges"); + } if (getInputShapeAtPort(DATA_ID).getRank() != 4) { THROW_CPU_NODE_ERR("has unsupported mode. Only 4D blobs are supported as input."); } @@ -824,8 +831,9 @@ void DeformableConvolution::getSupportedDescriptors() { } void DeformableConvolution::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } size_t inputsNumber = getOriginalInputsNumber(); NodeConfig config; @@ -958,9 +966,9 @@ void DeformableConvolution::DefConvExecutor::prepareSamplingWeights(const float* for (int kh = 0; kh < KH; kh++) { for (int kw = 0; kw < KW; kw++) { const size_t data_offset_h_index = - 2 * ((size_t)kh * KW + kw) * offStrides[1] + oh * offStrides[2] + ow * offStrides[3]; - const size_t data_offset_w_index = - (2 * ((size_t)kh * KW + kw) + 1) * offStrides[1] + oh * offStrides[2] + ow * offStrides[3]; + 2 * (static_cast(kh) * KW + kw) * offStrides[1] + oh * offStrides[2] + ow * offStrides[3]; + const size_t data_offset_w_index = (2 * (static_cast(kh) * KW + kw) + 1) * offStrides[1] + + oh * offStrides[2] + ow * offStrides[3]; const float offset_h = data_offset_ptr[data_offset_h_index]; const float offset_w = data_offset_ptr[data_offset_w_index]; float map_h = h_in + kh * (KDH + 1) + offset_h; @@ -1174,7 +1182,7 @@ void DeformableConvolution::DefConvRefExecutor::exec(const float* src, const int deformable_group_index = (IC * g + ic) / channel_per_deformable_group; int sampledCoordIndex = (mb * DGHW + deformable_group_index * HW + oh * OW + ow) * ker_size * sampledPointsPerPixel; - size_t weiIndex = (size_t)g * group_wei_stride + oc * weiStrides[0] + ic * weiStrides[1]; + size_t weiIndex = static_cast(g) * group_wei_stride + oc * weiStrides[0] + ic * weiStrides[1]; for (size_t kh_off = 0; kh_off < KH * weiStrides[2]; kh_off += weiStrides[2]) { for (size_t kw_off = 0; kw_off < KW * weiStrides[3]; kw_off += weiStrides[3]) { // check if current addendum marked as equal zero @@ -1223,24 +1231,30 @@ void DeformableConvolution::prepareParams() { auto offMemPtr = getSrcMemoryAtPort(OFF_ID); auto weiMemPtr = getSrcMemoryAtPort(WEI_ID); - if (!dstMemPtr || !dstMemPtr->isDefined()) + if (!dstMemPtr || !dstMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined destination memory"); - if (!srcMemPtr || !srcMemPtr->isDefined()) + } + if (!srcMemPtr || !srcMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined input memory"); - if (!offMemPtr || !offMemPtr->isDefined()) + } + if (!offMemPtr || !offMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined offsets shape memory"); - if (!weiMemPtr || !weiMemPtr->isDefined()) + } + if (!weiMemPtr || !weiMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined weights memory"); + } if (getOriginalInputsNumber() > 3) { auto modMemPtr = getSrcMemoryAtPort(MOD_ID); - if (!modMemPtr || !modMemPtr->isDefined()) + if (!modMemPtr || !modMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined modulations memory"); + } } auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); - if (!selectedPrimitiveDescriptor) + if (!selectedPrimitiveDescriptor) { THROW_CPU_NODE_ERR("doesn't have primitive descriptors."); + } auto config = selectedPrimitiveDescriptor->getConfig(); bool withModulation = getParentEdges().size() > 3; @@ -1302,7 +1316,7 @@ void DeformableConvolution::DefConvJitExecutor::exec(const float* src, this->pSampledCoordsVector = pSampledCoordsVector; this->pInterpWeightsVector = pInterpWeightsVector; prepareSamplingWeights(offsets, modulation, false); - size_t buffer_size = (size_t)jcp.nthr * jcp.ur_w * jcp.kh * jcp.kw * jcp.ic * jcp.typesize_in; + size_t buffer_size = static_cast(jcp.nthr) * jcp.ur_w * jcp.kh * jcp.kw * jcp.ic * jcp.typesize_in; std::vector input_buffer(buffer_size, 0); float* input_buffer_ptr = input_buffer.data(); @@ -1348,8 +1362,9 @@ void DeformableConvolution::execute(const dnnl::stream& strm) { float* dst = dstMemory.getDataAs(); auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); - if (!selectedPrimitiveDescriptor) + if (!selectedPrimitiveDescriptor) { OPENVINO_THROW("Deformable convolution with name '", getName(), "' doesn't have primitive descriptors."); + } auto config = selectedPrimitiveDescriptor->getConfig(); if (execPtr) { diff --git a/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp b/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp index ed8f1776d6c974..edba298536dbf6 100644 --- a/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp +++ b/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp @@ -70,12 +70,14 @@ DepthToSpace::DepthToSpace(const std::shared_ptr& op, const GraphConte if (!isSupportedOperation(op, errorMessage)) { OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } - if (inputShapes.size() != 1 || outputShapes.size() != 1) + if (inputShapes.size() != 1 || outputShapes.size() != 1) { THROW_CPU_NODE_ERR("has incorrect number of input/output edges!"); + } auto depthToSpace = ov::as_type_ptr(op); - if (!depthToSpace) + if (!depthToSpace) { THROW_CPU_NODE_ERR("supports only opset1"); + } const auto modeNgraph = depthToSpace->get_mode(); if (modeNgraph == ov::op::v0::DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST) { @@ -87,18 +89,22 @@ DepthToSpace::DepthToSpace(const std::shared_ptr& op, const GraphConte } attrs.blockSize = depthToSpace->get_block_size(); - if (attrs.blockSize == 0) + if (attrs.blockSize == 0) { THROW_CPU_NODE_ERR("has incorrect block_size parameter is zero!"); + } const size_t srcRank = getInputShapeAtPort(0).getRank(); const size_t dstRank = getOutputShapeAtPort(0).getRank(); - if (srcRank < 3) + if (srcRank < 3) { THROW_CPU_NODE_ERR("has incorrect number of input dimensions"); - if (srcRank > 5) + } + if (srcRank > 5) { THROW_CPU_NODE_ERR("doesn't support dimensions with rank greater than 5"); - if (srcRank != dstRank) + } + if (srcRank != dstRank) { THROW_CPU_NODE_ERR("has incorrect number of input/output dimensions"); + } const size_t nSpatialDims = srcRank - 2; attrs.blockStep = static_cast(std::pow(attrs.blockSize, nSpatialDims)); @@ -107,8 +113,9 @@ DepthToSpace::DepthToSpace(const std::shared_ptr& op, const GraphConte void DepthToSpace::getSupportedDescriptors() {} void DepthToSpace::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } ov::element::Type precision = getOriginalInputPrecisionAtPort(0); @@ -142,10 +149,12 @@ void DepthToSpace::initSupportedPrimitiveDescriptors() { }; supportedTypes.push_back(LayoutType::nspc); - if (canUseBlocked(8lu)) + if (canUseBlocked(8lu)) { supportedTypes.push_back(LayoutType::nCsp8c); - if (canUseBlocked(16lu)) + } + if (canUseBlocked(16lu)) { supportedTypes.push_back(LayoutType::nCsp16c); + } } supportedTypes.push_back(LayoutType::ncsp); auto creators = BlockedDescCreator::getCommonCreators(); @@ -161,12 +170,15 @@ void DepthToSpace::initSupportedPrimitiveDescriptors() { void DepthToSpace::createPrimitive() { auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(0); - if (!dstMemPtr) + if (!dstMemPtr) { THROW_CPU_NODE_ERR("has null destination memory"); - if (!srcMemPtr) + } + if (!srcMemPtr) { THROW_CPU_NODE_ERR("has null input memory"); - if (getSelectedPrimitiveDescriptor() == nullptr) + } + if (getSelectedPrimitiveDescriptor() == nullptr) { THROW_CPU_NODE_ERR("has unidentified preferable primitive descriptor"); + } const auto& memoryDesc = srcMemPtr->getDesc(); attrs.dataSize = memoryDesc.getPrecision().size(); @@ -177,8 +189,9 @@ void DepthToSpace::createPrimitive() { : LayoutType::ncsp; if (inputShapesDefined()) { - if (needPrepareParams()) + if (needPrepareParams()) { prepareParams(); + } updateLastInputDims(); } } @@ -199,8 +212,9 @@ void DepthToSpace::prepareParams() { } DepthToSpace::DepthToSpaceExecutor::DepthToSpaceExecutor(const DepthToSpaceAttrs& attrs) { - if (!one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c, LayoutType::nspc, LayoutType::ncsp)) + if (!one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c, LayoutType::nspc, LayoutType::ncsp)) { OPENVINO_THROW("DepthToSpace executor supports only 'nCsp16c', 'nCsp8c', 'nspc' or 'ncsp' layouts."); + } const bool isBlocked = one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c); const bool isChannelsFirst = attrs.layoutType == LayoutType::nspc; @@ -285,15 +299,17 @@ DepthToSpace::DepthToSpaceExecutor::DepthToSpaceExecutor(const DepthToSpaceAttrs std::iota(params.src_block_order.begin(), params.src_block_order.end(), 0); std::iota(params.dst_block_order.begin(), params.dst_block_order.end(), 0); - for (size_t i = 0; i < reshapedRank; i++) + for (size_t i = 0; i < reshapedRank; i++) { params.dst_block_dims[i] = params.src_block_dims[params.order[i]]; + } permuteKernel = std::unique_ptr(new PermuteKernel(params)); } void DepthToSpace::DepthToSpaceExecutor::exec(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr, const int MB) { - if (!permuteKernel) + if (!permuteKernel) { OPENVINO_THROW("Could not execute. Kernel for Transpose node was not compiled."); + } const uint8_t* srcData = srcMemPtr->getDataAs(); uint8_t* dstData = dstMemPtr->getDataAs(); diff --git a/src/plugins/intel_cpu/src/nodes/detection_output.cpp b/src/plugins/intel_cpu/src/nodes/detection_output.cpp index a730bd2943dc61..f41e0bd1ad58ab 100644 --- a/src/plugins/intel_cpu/src/nodes/detection_output.cpp +++ b/src/plugins/intel_cpu/src/nodes/detection_output.cpp @@ -55,11 +55,13 @@ DetectionOutput::DetectionOutput(const std::shared_ptr& op, const Grap OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } - if (getOriginalInputsNumber() != 3 && getOriginalInputsNumber() != 5) + if (getOriginalInputsNumber() != 3 && getOriginalInputsNumber() != 5) { THROW_CPU_NODE_ERR("has incorrect number of input edges."); + } - if (getOriginalOutputsNumber() != 1) + if (getOriginalOutputsNumber() != 1) { THROW_CPU_NODE_ERR("has incorrect number of output edges."); + } auto doOp = ov::as_type_ptr(op); auto attributes = doOp->get_attrs(); @@ -98,18 +100,21 @@ void DetectionOutput::prepareParams() { locNumForClasses = isShareLoc ? 1 : classesNum; const auto& idLocDims = getParentEdgeAt(ID_LOC)->getMemory().getShape().getStaticDims(); - if (priorsNum * locNumForClasses * 4 != static_cast(idLocDims[1])) + if (priorsNum * locNumForClasses * 4 != static_cast(idLocDims[1])) { THROW_CPU_NODE_ERR("has incorrect number of priors, which must match number of location predictions (", priorsNum * locNumForClasses * 4, " vs ", idLocDims[1], ")"); + } - if (priorsNum * classesNum != static_cast(idConfDims.back())) + if (priorsNum * classesNum != static_cast(idConfDims.back())) { THROW_CPU_NODE_ERR("has incorrect number of priors, which must match number of confidence predictions."); + } - if (decreaseClassId && backgroundClassId != 0) + if (decreaseClassId && backgroundClassId != 0) { THROW_CPU_NODE_ERR("cannot use decrease_label_id and background_label_id parameter simultaneously."); + } imgNum = static_cast(idConfDims[0]); @@ -118,8 +123,9 @@ void DetectionOutput::prepareParams() { indicesBuffer.resize(imgNum * classesNum * priorsNum); indices.resize(imgNum * classesNum * priorsNum); // prior info for shared_location - if (isShareLoc) + if (isShareLoc) { confInfoForPrior.resize(imgNum * priorsNum); + } // confs...count...indices for caffe style and sparsity case. // caffe: filter(conf_info for sparsity or indices for dense) --> topk(buffer) --> nms(indices) @@ -136,13 +142,15 @@ void DetectionOutput::prepareParams() { } void DetectionOutput::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } std::vector inDataConf; inDataConf.reserve(inputShapes.size()); - for (size_t i = 0; i < inputShapes.size(); ++i) + for (size_t i = 0; i < inputShapes.size(); ++i) { inDataConf.emplace_back(LayoutType::ncsp, ov::element::f32); + } addSupportedPrimDesc(inDataConf, {{LayoutType::ncsp, ov::element::f32}}, impl_desc_type::ref_any); } @@ -151,10 +159,12 @@ struct ConfidenceComparatorDO { explicit ConfidenceComparatorDO(const float* confDataIn) : confData(confDataIn) {} bool operator()(int idx1, int idx2) { - if (confData[idx1] > confData[idx2]) + if (confData[idx1] > confData[idx2]) { return true; - if (confData[idx1] < confData[idx2]) + } + if (confData[idx1] < confData[idx2]) { return false; + } return idx1 < idx2; } @@ -192,8 +202,9 @@ void DetectionOutput::execute(const dnnl::stream& strm) { ppriors += varianceEncodedInTarget ? (n * priorsNum * priorSize) : (2 * n * priorsNum * priorSize); getActualPriorNum(ppriors, numPriorsActualdata, n); } - if (!isPriorsPerImg && imgNum > 1) + if (!isPriorsPerImg && imgNum > 1) { std::fill_n(numPriorsActualdata + 1, imgNum - 1, numPriorsActualdata[0]); + } if (!isSparsityWorthwhile) { confReorderDense(confData, ARMConfData, reorderedConfData); @@ -339,8 +350,9 @@ void DetectionOutput::execute(const dnnl::stream& strm) { int* pbuffer = indicesBufData + off; int* pdetections = detectionsData + n * classesNum + c; - if (!isSparsityWorthwhile) + if (!isSparsityWorthwhile) { confFilterCF(pconfReorder, pindices, pbuffer, pdetections, n); + } const float* pboxes; const float* psizes; @@ -364,8 +376,9 @@ void DetectionOutput::execute(const dnnl::stream& strm) { int* pindices = indicesData + offImg; int* pdetections = detectionsData + n * classesNum; - if (!isSparsityWorthwhile) + if (!isSparsityWorthwhile) { confFilterMX(pconf, ARMConfData, pconfReorder, pindices, pbuffer, pdetections, n); + } const float* pboxes = decodedBboxesData + n * 4 * locNumForClasses * priorsNum; const float* psizes = bboxSizesData + n * locNumForClasses * priorsNum; @@ -461,9 +474,10 @@ inline void DetectionOutput::confFilterMX(const float* confData, int maxCIdx = 0; for (int c = 1; c < classesNum; ++c) { float conf = confData[p * classesNum + c]; - if (isARMPrior) + if (isARMPrior) { conf = (c == backgroundClassId) ? 1.0f : 0.0f; // still need refresh conf due to read from origin conf + } if (conf >= confidenceThreshold && conf > maxConf) { maxConf = conf; maxCIdx = c; @@ -574,13 +588,15 @@ inline void DetectionOutput::confReorderAndFilterSparsityCF(const float* confDat if (withAddBoxPred) { const bool isARMPrior = ARMConfData[n * priorsNum * 2 + p * 2 + 1] < objScore; bool priorStatusSet = false; - if (isShareLoc) + if (isShareLoc) { confInfoForPrior[offV + p] = -1; + } int confIdxPrior = off + p * classesNum; for (int c = 0; c < classesNum; ++c) { float conf = confData[confIdxPrior + c]; - if (isARMPrior) + if (isARMPrior) { conf = (c == backgroundClassId) ? 1.0f : 0.0f; + } if (conf > confidenceThreshold) { const int idx = offH + c * confInfoLen; reorderedConfData[idx + p] = conf; @@ -597,8 +613,9 @@ inline void DetectionOutput::confReorderAndFilterSparsityCF(const float* confDat } } else { bool priorStatusSet = false; - if (isShareLoc) + if (isShareLoc) { confInfoForPrior[offV + p] = -1; + } int confIdxPrior = off + p * classesNum; for (int c = 0; c < classesNum; ++c) { float conf = confData[confIdxPrior + c]; @@ -621,8 +638,9 @@ inline void DetectionOutput::confReorderAndFilterSparsityCF(const float* confDat parallel_for(classesNum, [&](size_t c) { // in: conf_h info // out: buffer, detectionCount(k) - if (c == static_cast(backgroundClassId)) // Ignore background class + if (c == static_cast(backgroundClassId)) { // Ignore background class return; + } const int countIdx = offH + c * confInfoLen + priorsNum; const int count = reorderedConfDataIndices[countIdx]; const int k = (topK == -1 ? count : (std::min)(topK, count)); @@ -650,18 +668,21 @@ inline void DetectionOutput::confReorderAndFilterSparsityMX(const float* confDat std::mutex mtx; parallel_for(numPriorsActual[n], [&](size_t p) { bool isARMPrior = false; - if (withAddBoxPred) + if (withAddBoxPred) { isARMPrior = ARMConfData[n * priorsNum * 2 + p * 2 + 1] < objScore; + } bool priorStatusSet = false; - if (isShareLoc) + if (isShareLoc) { confInfoForPrior[offV + p] = -1; + } float maxConf = -1; int maxCIdx = 0; int confIdxPrior = off + p * classesNum; for (int c = 0; c < classesNum; ++c) { float conf = confData[confIdxPrior + c]; - if (withAddBoxPred && isARMPrior) + if (withAddBoxPred && isARMPrior) { conf = (c == backgroundClassId) ? 1.0f : 0.0f; + } if (conf >= confidenceThreshold) { int idx = off + c * confInfoLen; reorderedConfData[idx + p] = conf; @@ -923,12 +944,13 @@ inline void DetectionOutput::generateOutput(float* reorderedConfData, } int dstDataSize = 0; - if (keepTopK > 0) + if (keepTopK > 0) { dstDataSize = imgNum * keepTopK * DETECTION_SIZE * sizeof(float); - else if (topK > 0) + } else if (topK > 0) { dstDataSize = imgNum * topK * classesNum * DETECTION_SIZE * sizeof(float); - else + } else { dstDataSize = imgNum * classesNum * priorsNum * DETECTION_SIZE * sizeof(float); + } if (static_cast(dstDataSize) > getChildEdgeAt(0)->getMemory().getSize()) { THROW_CPU_NODE_ERR("has insufficient output buffer size."); diff --git a/src/plugins/intel_cpu/src/nodes/dft.cpp b/src/plugins/intel_cpu/src/nodes/dft.cpp index 1d18e1b64d28a1..ab542e5a51de9b 100644 --- a/src/plugins/intel_cpu/src/nodes/dft.cpp +++ b/src/plugins/intel_cpu/src/nodes/dft.cpp @@ -81,8 +81,9 @@ DFT::DFT(const std::shared_ptr& op, const GraphContext::CPtr& context) void DFT::getSupportedDescriptors() {} void DFT::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } const auto& dataPrecision = getOriginalInputPrecisionAtPort(DATA_INDEX); if (!dataPrecision.is_real()) { @@ -103,8 +104,9 @@ void DFT::initSupportedPrimitiveDescriptors() { std::vector inDataConfigurators( {{LayoutType::ncsp, ov::element::f32}, {LayoutType::ncsp, ov::element::i32}}); - if (inputShapes.size() > SIGNAL_SIZE_INDEX) + if (inputShapes.size() > SIGNAL_SIZE_INDEX) { inDataConfigurators.push_back({LayoutType::ncsp, ov::element::i32}); + } addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, ov::element::f32}}, impl_desc_type::ref_any); } @@ -207,8 +209,10 @@ void copyDataToOutputWithSignalSize(const float* input, float* output, const std::vector& outputShape, const std::vector& outputStrides) { - auto totalInput = std::accumulate(inputShape.begin(), inputShape.end(), size_t(1), std::multiplies()); - auto totalOutput = std::accumulate(outputShape.begin(), outputShape.end(), size_t(1), std::multiplies()); + auto totalInput = + std::accumulate(inputShape.begin(), inputShape.end(), static_cast(1), std::multiplies()); + auto totalOutput = + std::accumulate(outputShape.begin(), outputShape.end(), static_cast(1), std::multiplies()); std::fill_n(output, totalOutput, 0.f); size_t lastChangedDim = 0; for (size_t index = inputShape.size() - 1; index > 0; --index) { @@ -232,7 +236,7 @@ void copyDataToOutputWithSignalSize(const float* input, const std::vector outputStridesRange(outputStrides.begin(), outputStrides.begin() + iterationRange.size()); const size_t blockSize = std::accumulate(inputShape.begin() + lastChangedDim + 1, inputShape.end(), - size_t(1), + static_cast(1), std::multiplies()); const size_t blockSizeBytes = blockSize * sizeof(float); std::vector iterationCounter(iterationRange.size(), 0); @@ -282,7 +286,7 @@ void DFT::execute(const dnnl::stream& strm) { copyDataToOutputWithSignalSize(src, inputShape, inputStrides, dst, outputShape, outputStrides); } else { auto totalElements = - std::accumulate(inputShape.begin(), inputShape.end(), size_t(1), std::multiplies()); + std::accumulate(inputShape.begin(), inputShape.end(), static_cast(1), std::multiplies()); cpu_memcpy(dst, src, totalElements * sizeof(float)); } @@ -593,8 +597,9 @@ void DFT::createJITKernels(bool hasDFT, bool hasFFT) { OPENVINO_THROW("Can't create jit DFT kernel"); } - if (dftKernel) + if (dftKernel) { dftKernel->create_ker(); + } } if (hasFFT && fftKernel == nullptr) { @@ -608,8 +613,9 @@ void DFT::createJITKernels(bool hasDFT, bool hasFFT) { OPENVINO_THROW("Can't create jit FFT kernel"); } - if (fftKernel) + if (fftKernel) { fftKernel->create_ker(); + } } #endif } diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp index c13f22b0d9b76a..094ebbff01eb6d 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp @@ -296,8 +296,9 @@ std::set> eltwise_precision_helper::get_supported_pre OV_CASE(Algorithm::EltwiseBitwiseOr, jit_bitwise_or_emitter), OV_CASE(Algorithm::EltwiseBitwiseXor, jit_bitwise_xor_emitter)); - if (precisions.empty()) + if (precisions.empty()) { OPENVINO_THROW("Unsupported operation type for Eltwise emitter"); + } return precisions; } @@ -411,37 +412,45 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener Xbyak::Label tail_loop_label; Xbyak::Label tail_loop_end_label; - if (isa == x64::avx512_core) + if (isa == x64::avx512_core) { vpxord(vmm_zero, vmm_zero, vmm_zero); + } for (size_t i = 0; i < jep.inputs_number; i++) { - if (jep.src_size[i] == 1) + if (jep.src_size[i] == 1) { load_vector(get_vmm_reg(i), ptr[get_src_reg(i)], jep.src_prc[i], exec_prc, true); + } } size_t min_src_size = jep.dst_size; for (size_t i = 0; i < jep.inputs_number; i++) { - if (jep.src_size[i] != 1) + if (jep.src_size[i] != 1) { min_src_size = std::min(min_src_size, jep.src_size[i]); + } } - if (jep_.oc_size > 1) + if (jep_.oc_size > 1) { min_src_size = std::min(min_src_size, jep_.oc_size); + } if (min_src_size != jep.dst_size) { bool is_valid_configuration = true; - if (jep.dst_size % min_src_size != 0) + if (jep.dst_size % min_src_size != 0) { is_valid_configuration = false; + } for (size_t i = 0; i < jep.inputs_number; i++) { - if (jep.src_size[i] != 1 && jep.src_size[i] != min_src_size && jep.src_size[i] != jep.dst_size) + if (jep.src_size[i] != 1 && jep.src_size[i] != min_src_size && jep.src_size[i] != jep.dst_size) { is_valid_configuration = false; + } } - if (jep_.oc_size > 1 && jep_.oc_size != min_src_size && jep_.oc_size != jep.dst_size) + if (jep_.oc_size > 1 && jep_.oc_size != min_src_size && jep_.oc_size != jep.dst_size) { is_valid_configuration = false; + } - if (!is_valid_configuration) + if (!is_valid_configuration) { OPENVINO_THROW("Eltwise jitter has invalid configuration for Eltwise node"); + } L(unroll_loop_label); { @@ -453,12 +462,13 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener for (size_t j = 0; j < min_src_size / vec_step; j++) { for (size_t i = 0; i < jep.inputs_number; i++) { - if (jep.src_size[i] != 1) + if (jep.src_size[i] != 1) { load_vector(get_vmm_reg(i), ptr[get_src_reg(i) + j * vec_step * jep.src_prc[i].size()], jep.src_prc[i], exec_prc, false); + } } compute_eltwise_op(); @@ -471,11 +481,12 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener size_t tail_start = min_src_size - min_src_size % vec_step; for (size_t j = tail_start; j < min_src_size; j++) { for (size_t i = 0; i < jep.inputs_number; i++) { - if (jep.src_size[i] != 1) + if (jep.src_size[i] != 1) { load_scalar(get_xmm_reg(i), ptr[get_src_reg(i) + j * jep.src_prc[i].size()], jep.src_prc[i], exec_prc); + } } compute_eltwise_op(); @@ -489,14 +500,17 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener jep.do_output_saturation); } - for (size_t i = 0; i < jep.inputs_number; i++) - if (jep.src_size[i] == jep.dst_size) + for (size_t i = 0; i < jep.inputs_number; i++) { + if (jep.src_size[i] == jep.dst_size) { add(get_src_reg(i), jep.src_prc[i].size() * loop_step); + } + } add(reg_dst, jep.dst_prc.size() * loop_step); sub(reg_work_amount, loop_step); - if (jep_.oc_size > 1 && jep_.oc_size != min_src_size) + if (jep_.oc_size > 1 && jep_.oc_size != min_src_size) { add(reg_oc_off, loop_step * sizeof(float)); + } jmp(unroll_loop_label, T_NEAR); } @@ -513,8 +527,9 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener jl(main_loop_end_label, T_NEAR); for (size_t i = 0; i < jep.inputs_number; i++) { - if (jep.src_size[i] != 1) + if (jep.src_size[i] != 1) { load_vector(get_vmm_reg(i), ptr[get_src_reg(i)], jep.src_prc[i], exec_prc, false); + } } compute_eltwise_op(); @@ -523,14 +538,17 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener store_vector(ptr[reg_dst], vmm_dst, exec_prc, jep.dst_prc); - for (size_t i = 0; i < jep.inputs_number; i++) - if (jep.src_size[i] != 1) + for (size_t i = 0; i < jep.inputs_number; i++) { + if (jep.src_size[i] != 1) { add(get_src_reg(i), jep.src_prc[i].size() * loop_step); + } + } add(reg_dst, jep.dst_prc.size() * loop_step); sub(reg_work_amount, loop_step); - if (jep_.oc_size > 1) + if (jep_.oc_size > 1) { add(reg_oc_off, loop_step * sizeof(float)); + } jmp(main_loop_label, T_NEAR); } @@ -546,8 +564,9 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener jl(tail_loop_end_label, T_NEAR); for (size_t i = 0; i < jep.inputs_number; i++) { - if (jep.src_size[i] != 1) + if (jep.src_size[i] != 1) { load_scalar(get_xmm_reg(i), ptr[get_src_reg(i)], jep.src_prc[i], exec_prc); + } } compute_eltwise_op(); @@ -556,14 +575,17 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener store_scalar(ptr[reg_dst], xmm_dst, exec_prc, jep.dst_prc, jep.do_output_saturation); - for (size_t i = 0; i < jep.inputs_number; i++) - if (jep.src_size[i] != 1) + for (size_t i = 0; i < jep.inputs_number; i++) { + if (jep.src_size[i] != 1) { add(get_src_reg(i), jep.src_prc[i].size() * loop_step); + } + } add(reg_dst, jep.dst_prc.size() * loop_step); sub(reg_work_amount, loop_step); - if (jep_.oc_size > 1) + if (jep_.oc_size > 1) { add(reg_oc_off, loop_step * sizeof(float)); + } jmp(tail_loop_label, T_NEAR); } @@ -572,8 +594,9 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener this->postamble(); - if (uni_vcvtneps2bf16) + if (uni_vcvtneps2bf16) { uni_vcvtneps2bf16->emit_data(); + } eltwise_emitter->emit_data(); for (size_t i = 0; i < post_op_emitters.size(); i++) { @@ -696,8 +719,9 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener OV_CASE(Algorithm::EltwiseBitwiseOr, jit_bitwise_or_emitter), OV_CASE(Algorithm::EltwiseBitwiseXor, jit_bitwise_xor_emitter)); - if (!ctx.emitter) + if (!ctx.emitter) { OPENVINO_THROW("Unsupported operation type for Eltwise emitter"); + } return ctx.emitter; } @@ -705,10 +729,12 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener inline void compute_eltwise_op() { std::vector in_idxs; std::vector aux_idxs; - for (size_t i = 0; i < eltwise_emitter->get_inputs_num(); i++) + for (size_t i = 0; i < eltwise_emitter->get_inputs_num(); i++) { in_idxs.push_back(get_vmm_reg(i).getIdx()); - for (size_t i = 0; i < eltwise_emitter->aux_vecs_count(); i++) + } + for (size_t i = 0; i < eltwise_emitter->aux_vecs_count(); i++) { aux_idxs.push_back(get_aux_vmm(i).getIdx()); + } std::vector out_idxs; out_idxs.push_back(vmm_dst.getIdx()); @@ -725,10 +751,12 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener std::vector in_idxs; std::vector aux_idxs; in_idxs.push_back(vmm_dst.getIdx()); - for (size_t j = 1; j < post_op_emitters[eltwise_post_op_idx]->get_inputs_num(); j++) + for (size_t j = 1; j < post_op_emitters[eltwise_post_op_idx]->get_inputs_num(); j++) { in_idxs.push_back(get_vmm_reg(input_idx++).getIdx()); - for (size_t j = 0; j < post_op_emitters[eltwise_post_op_idx]->aux_vecs_count(); j++) + } + for (size_t j = 0; j < post_op_emitters[eltwise_post_op_idx]->aux_vecs_count(); j++) { aux_idxs.push_back(get_aux_vmm(j).getIdx()); + } std::vector out_idxs; out_idxs.push_back(vmm_dst.getIdx()); @@ -826,12 +854,14 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener switch (dst_prc) { case ov::element::f32: - if (!src_prc.is_real()) + if (!src_prc.is_real()) { uni_vcvtdq2ps(vmm_src, vmm_src); + } break; case ov::element::i32: - if (src_prc.is_real()) + if (src_prc.is_real()) { uni_vcvtps2dq(vmm_src, vmm_src); + } break; default: OPENVINO_THROW("unknown dst_prc"); @@ -901,12 +931,14 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener switch (dst_prc) { case ov::element::f32: - if (!src_prc.is_real()) + if (!src_prc.is_real()) { uni_vcvtdq2ps(xmm_src, xmm_src); + } break; case ov::element::i32: - if (src_prc.is_real()) + if (src_prc.is_real()) { uni_vcvtps2dq(xmm_src, xmm_src); + } break; default: OPENVINO_THROW("unknown dst_prc"); @@ -927,12 +959,14 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener switch (src_prc) { case ov::element::f32: - if (!dst_prc.is_real()) + if (!dst_prc.is_real()) { uni_vcvtps2dq(vmm_dst, vmm_dst); + } break; case ov::element::i32: - if (dst_prc.is_real()) + if (dst_prc.is_real()) { uni_vcvtdq2ps(vmm_dst, vmm_dst); + } break; default: OPENVINO_THROW("unknown src_prc"); @@ -989,13 +1023,15 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener vpmovsdb(op, vmm_dst); } else { uni_vpackssdw(vmm_dst, vmm_dst, vmm_dst); - if (isa != x64::sse41) + if (isa != x64::sse41) { vpermq(ymm_dst, ymm_dst, 0x08); + } uni_vpacksswb(vmm_dst, vmm_dst, vmm_dst); - if (isa != x64::sse41) + if (isa != x64::sse41) { vmovq(op, xmm_dst); - else + } else { movd(op, xmm_dst); + } } break; case ov::element::u8: @@ -1004,13 +1040,15 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener vpmovusdb(op, vmm_dst); } else { uni_vpackusdw(vmm_dst, vmm_dst, vmm_dst); - if (isa != x64::sse41) + if (isa != x64::sse41) { vpermq(ymm_dst, ymm_dst, 0x08); + } uni_vpackuswb(vmm_dst, vmm_dst, vmm_dst); - if (isa != x64::sse41) + if (isa != x64::sse41) { vmovq(op, xmm_dst); - else + } else { movd(op, xmm_dst); + } } break; default: @@ -1040,12 +1078,14 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener switch (src_prc) { case ov::element::f32: - if (!dst_prc.is_real()) + if (!dst_prc.is_real()) { uni_vcvtps2dq(xmm_dst, xmm_dst); + } break; case ov::element::i32: - if (dst_prc.is_real()) + if (dst_prc.is_real()) { uni_vcvtdq2ps(xmm_dst, xmm_dst); + } break; default: OPENVINO_THROW("unknown src_prc"); @@ -1057,11 +1097,12 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener uni_vmovss(op, xmm_dst); break; case ov::element::bf16: - if (do_output_saturation) + if (do_output_saturation) { uni_vpsrld(xmm_dst, xmm_dst, 16); - else + } else { uni_vcvtneps2bf16->emit_code({static_cast(xmm_dst.getIdx())}, {static_cast(xmm_dst.getIdx())}); + } uni_vpextrw(op, xmm_dst, 0x0); break; case ov::element::f16: @@ -1112,10 +1153,11 @@ Eltwise::BroadcastingPolicy Eltwise::determineBroadcastingPolicy(const std::shar } auto const_shape = op->get_input_shape(constPort); - if (ov::shape_size(const_shape) == 1) + if (ov::shape_size(const_shape) == 1) { return PerTensor; - else + } else { return PerChannel; + } } const std::map& Eltwise::getInitializers() { @@ -1429,8 +1471,9 @@ struct EltwiseKey { for (size_t i = 0; i < inpDims.size(); ++i) { broadcast = (inpDims[i].back() == 1); rhsBroadcast = (rhs.inpDims[i].back() == 1); - if (broadcast != rhsBroadcast) + if (broadcast != rhsBroadcast) { return false; + } } } else { result = result && outOrder == rhs.outOrder && outBlkDims == rhs.outBlkDims; @@ -1438,8 +1481,9 @@ struct EltwiseKey { result = result && (inpDims[i] == rhs.inpDims[i]); } } - if (doOutputSaturation != rhs.doOutputSaturation) + if (doOutputSaturation != rhs.doOutputSaturation) { return false; + } } return result; @@ -1490,11 +1534,12 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor { auto collapseLastOffsets = [](std::vector& dims, int dimsToCollapse) { for (size_t i = dims.size() - 2; i > dims.size() - dimsToCollapse - 2; i--) { - if (dims[dims.size() - 1] > 0 || dims[i] > 0) + if (dims[dims.size() - 1] > 0 || dims[i] > 0) { dims[dims.size() - 1] = std::max(dims[dims.size() - 1], static_cast(1)) * std::max(dims[i], static_cast(1)); - else + } else { dims[dims.size() - 1] *= dims[i]; + } } for (int i = dims.size() - 2; i >= dimsToCollapse; i--) { @@ -1540,8 +1585,9 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor { for (size_t i = 0; i < inpDims.size(); i++) { for (size_t j = 0; j < inpDims[i].size(); j++) { - if (inpDims[i][j] != jep.dims[j] && inpDims[i][j] != 1) + if (inpDims[i][j] != jep.dims[j] && inpDims[i][j] != 1) { OPENVINO_THROW("Eltwise executor got invalid input/output dims configuration."); + } } } @@ -1585,8 +1631,9 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor { bool hasDifferentDims = false; while (!useRuntimePtrs && currentJitWorkAmount < minimalJitWorkAmount && currentJitWorkAmount < fullWorkAmount) { - if (collapsedDims >= maxCollapsedDims) + if (collapsedDims >= maxCollapsedDims) { break; + } for (size_t j = 1; j < inpDims.size(); j++) { if (inpDims[j].back() != inpDims[0].back()) { @@ -1690,13 +1737,15 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor { } #endif // OPENVINO_ARCH_ARM64 - if (_pKernel) + if (_pKernel) { _pKernel->create_ker(); + } } void exec(const jit_eltwise_call_args_ptrs& args_ptrs, const VectorDims& dims_out) override { - if (!_pKernel) + if (!_pKernel) { OPENVINO_THROW("Can't execute, kernel for eltwise node is not compiled"); + } if (_pKernel->jep_.input_size == optimalTensorRank) { // execute Optimized 6D @@ -1736,8 +1785,9 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor { tmp /= dims_out[j]; } - for (size_t j = 0; j < counters.size(); j++) + for (size_t j = 0; j < counters.size(); j++) { args.indexes[j] = counters[j]; + } (*_pKernel)(&args_ptrs, &args); } @@ -1745,8 +1795,9 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor { } } const VectorDims& getOutDims() const override { - if (!_pKernel) + if (!_pKernel) { OPENVINO_THROW("Can't get jit eltwise params, kernel for Eltwise executor is not compiled"); + } return _pKernel->jep_.dims; } size_t getBatchDimIdx() const override { @@ -1906,7 +1957,7 @@ class EltwiseRefExecutor : public EltwiseRefBaseExecutor { T* dst_ptr_f = reinterpret_cast(args_ptrs.dst_ptr); uint32_t count_of_power_values = 1; - for (unsigned long i : this->_inpDims[1]) { + for (uint64_t i : this->_inpDims[1]) { count_of_power_values *= i; } @@ -2310,10 +2361,12 @@ bool Eltwise::isWithBroadcast() { } void Eltwise::getSupportedDescriptors() { - if (getParentEdges().size() < 1) + if (getParentEdges().size() < 1) { OPENVINO_THROW("Incorrect number of input edges for layer ", getName()); - if (getChildEdges().empty()) + } + if (getChildEdges().empty()) { OPENVINO_THROW("Incorrect number of output edges for layer ", getName()); + } } void Eltwise::initSupportedPrimitiveDescriptors() { @@ -2342,10 +2395,11 @@ void Eltwise::initSupportedPrimitiveDescriptors() { ov::element::f16, ov::element::i32}; - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } - // if dim rank is greater than the maximum possible, we should use the reference execution + // if dim rank is greater than the maximum possible, we should use the reference execution #if defined(OPENVINO_ARCH_ARM64) bool canUseOptimizedImpl = mayiuse(dnnl::impl::cpu::aarch64::asimd) && (getInputShapeAtPort(0).getRank() <= MAX_ELTWISE_DIM_RANK); @@ -2374,7 +2428,7 @@ void Eltwise::initSupportedPrimitiveDescriptors() { expectedInputsNum += eltwiseNode->getOpInputsNum() - 1; } } - if (getParentEdges().size() > MAX_ELTWISE_INPUTS) + if (getParentEdges().size() > MAX_ELTWISE_INPUTS) { OPENVINO_THROW("Eltwise node with name `", getName(), "` doesn't support more than ", @@ -2382,8 +2436,9 @@ void Eltwise::initSupportedPrimitiveDescriptors() { " inputs (actual = ", getParentEdges().size(), ")"); + } - if (expectedInputsNum != getParentEdges().size()) + if (expectedInputsNum != getParentEdges().size()) { OPENVINO_THROW("Eltwise node with name `", getName(), "` has invalid input number of inputs: expected = ", @@ -2391,6 +2446,7 @@ void Eltwise::initSupportedPrimitiveDescriptors() { " (actual = ", getParentEdges().size(), ")"); + } std::vector inputPrecisions; for (const auto& prec : getOriginalInputPrecisions()) { @@ -2400,8 +2456,9 @@ void Eltwise::initSupportedPrimitiveDescriptors() { for (auto& fusedNode : fusedWith) { if (fusedNode->getType() == Type::Eltwise) { for (int i = 0; i < static_cast(fusedNode->getOriginalInputsNumber()); i++) { - if (fusedNode->getFusingPort() != i) + if (fusedNode->getFusingPort() != i) { inputPrecisions.push_back(fusedNode->getOriginalInputPrecisionAtPort(i)); + } } } #ifndef OPENVINO_ARCH_ARM64 @@ -2411,8 +2468,9 @@ void Eltwise::initSupportedPrimitiveDescriptors() { #endif } - if (inputPrecisions.size() != getParentEdges().size()) + if (inputPrecisions.size() != getParentEdges().size()) { OPENVINO_THROW("Eltwise node with name `", getName(), "` has invalid input precisions configuration."); + } ov::element::Type outputPrecision = getOriginalOutputPrecisionAtPort(0); if (!fusedWith.empty()) { @@ -2426,12 +2484,15 @@ void Eltwise::initSupportedPrimitiveDescriptors() { if (!hasHardwareSupport(ov::element::bf16)) { bool hasBF16 = false; - for (auto& inPrc : inputPrecisions) - if (inPrc == ov::element::bf16) + for (auto& inPrc : inputPrecisions) { + if (inPrc == ov::element::bf16) { hasBF16 = true; + } + } - if (outputPrecision == ov::element::bf16 || hasBF16) + if (outputPrecision == ov::element::bf16 || hasBF16) { OPENVINO_THROW("Eltwise node with name `", getName(), "` doesn't support BF16 precision on this target."); + } } # if defined(OV_CPU_WITH_ACL) const bool useJit = false; @@ -2598,8 +2659,9 @@ void Eltwise::initSupportedPrimitiveDescriptors() { BlockedMemoryDesc::CmpMask inputMask = BlockedMemoryDesc::SKIP_OFFSET_MASK; PortConfig portConfig; // TODO [DS]: inplace - if (!isDynamicNode()) + if (!isDynamicNode()) { portConfig.inPlace((!i && canBeInPlace() && inputPrecisions[i] == outputPrecision) ? 0 : -1); + } portConfig.constant(false); const auto& srcShape = getInputShapeAtPort(i); @@ -2695,9 +2757,10 @@ void Eltwise::initSupportedPrimitiveDescriptors() { isBlockedApplicable = isBlockedApplicable && implication(inShape.getRank() != 1, getOutputShapeAtPort(0).getRank() == inShape.getRank()); - if (isDynamicNode() && inShape.getRank() != 1) + if (isDynamicNode() && inShape.getRank() != 1) { isBlockedApplicable = isBlockedApplicable && inShape.getMinDims()[1] != Shape::UNDEFINED_DIM && inShape.getMinDims()[1] > 1; + } } inputNum = getParentEdges().size(); @@ -2750,24 +2813,29 @@ void Eltwise::initSupportedPrimitiveDescriptors() { #endif if (context->getConfig().modelType == Config::ModelType::CNN) { - if (isChannelsFirstApplicable) + if (isChannelsFirstApplicable) { supportedPrimitiveDescriptors.emplace_back(initDesc(ChannelsFirst)); - if (isBlockedApplicable) + } + if (isBlockedApplicable) { supportedPrimitiveDescriptors.emplace_back(initDesc(Blocked)); + } supportedPrimitiveDescriptors.emplace_back(initDesc(Planar)); } else { supportedPrimitiveDescriptors.emplace_back(initDesc(Planar)); - if (isChannelsFirstApplicable) + if (isChannelsFirstApplicable) { supportedPrimitiveDescriptors.emplace_back(initDesc(ChannelsFirst)); - if (isBlockedApplicable) + } + if (isBlockedApplicable) { supportedPrimitiveDescriptors.emplace_back(initDesc(Blocked)); + } } } void Eltwise::createPrimitive() { if (memPtrs.empty()) { - for (size_t i = 0; i < inputNum; i++) + for (size_t i = 0; i < inputNum; i++) { memPtrs.push_back(getSrcMemoryAtPort(i)); + } memPtrs.push_back(getDstMemoryAtPort(0)); } @@ -2835,8 +2903,9 @@ void Eltwise::prepareParams() { // WA to handle nspc layout with 1D tensors if (1 == inRank) { - if (outRank > 2 && 1 == outOrder.back()) + if (outRank > 2 && 1 == outOrder.back()) { startOff = 1; + } } for (size_t j = 0; j < inRank; j++) { @@ -2959,8 +3028,10 @@ void Eltwise::prepareParams() { bool Eltwise::needPrepareParams() const { for (size_t i = 0; i < getParentEdges().size(); i++) { - if (getParentEdgeAt(i)->getMemory().getDescWithType()->getBlockDims() != currentInBlkDims[i]) + if (getParentEdgeAt(i)->getMemory().getDescWithType()->getBlockDims() != + currentInBlkDims[i]) { return true; + } } return false; } @@ -2974,8 +3045,9 @@ void Eltwise::execute(const dnnl::stream& strm) { jit_eltwise_call_args_ptrs args_ptrs = {}; VectorDims dims_out = implType == EltwiseImplType::optimizedShapeAgnostic ? execParams.outDims : execPtr->getOutDims(); - for (size_t i = 0; i < memPtrs.size() - 1; i++) + for (size_t i = 0; i < memPtrs.size() - 1; i++) { args_ptrs.src_ptr[i] = memPtrs[i]->getDataAs() + start_offset_in[i]; + } args_ptrs.dst_ptr = memPtrs.back()->getDataAs() + start_offset_out; args_ptrs.post_op_data = fqDataPtrs.data(); @@ -3018,15 +3090,17 @@ bool Eltwise::canBeInPlace() const { for (auto& parentEdge : getParentEdges()) { auto parent = parentEdge.lock()->getParent(); - if (parent->getChildEdges().size() != 1) + if (parent->getChildEdges().size() != 1) { return false; + } // WA to prevent memory corruption caused by inplace feature if (parent->getType() == Type::Concatenation) { for (auto& parentParentEdge : parent->getParentEdges()) { auto parentParent = parentParentEdge.lock()->getParent(); - if (parentParent->getChildEdges().size() != 1) + if (parentParent->getChildEdges().size() != 1) { return false; + } } } } @@ -3140,8 +3214,9 @@ void Eltwise::appendPostOpsImpl(dnnl::post_ops& ops, depthwiseData.resize(depthwiseDataSize + bufferPaddingSize, 0); } - if (depthwiseData.empty()) + if (depthwiseData.empty()) { THROW_CPU_NODE_ERR("cannot be performed since buffers are not allocated"); + } std::array offsets = {0}; offsets[1] = offsets[0] + channelSize; @@ -3243,8 +3318,9 @@ bool Eltwise::appendAttrPostOps(DnnlPostOpsComposerLegacy& dnnlpoc, } break; case Algorithm::EltwisePrelu: - if (!allowBinary) + if (!allowBinary) { return false; + } dnnlpoc.appendBinary(dnnl::algorithm::binary_prelu, scales); break; default: @@ -3317,8 +3393,9 @@ bool Eltwise::canFuse(const NodePtr& node) const { return false; } #else - if (!mayiuse(x64::sse41) || getInputShapeAtPort(0).getRank() > MAX_ELTWISE_DIM_RANK) + if (!mayiuse(x64::sse41) || getInputShapeAtPort(0).getRank() > MAX_ELTWISE_DIM_RANK) { return false; + } #endif // TODO: EltwiseLog is supported only via reference executor @@ -3342,13 +3419,15 @@ bool Eltwise::canFuse(const NodePtr& node) const { } bool isIntegerNode = isIntegerComputeSupported(this); - if (isIntegerNode && node->getType() != Type::Eltwise) + if (isIntegerNode && node->getType() != Type::Eltwise) { return false; + } // FQ inputs with quantization parameters will be hided inside post_op object, so will not increase inputs number size_t addedInputEdgesNum = node->getType() != Type::FakeQuantize ? (node->getParentEdges().size() - 1) : 0; - if (getParentEdges().size() + addedInputEdgesNum > MAX_ELTWISE_INPUTS) + if (getParentEdges().size() + addedInputEdgesNum > MAX_ELTWISE_INPUTS) { return false; + } if (node->getType() == Type::Eltwise) { // [WA] Since execution precision change from I32 to FP32 for arithmetic operations may lead to incorrect @@ -3389,8 +3468,9 @@ bool Eltwise::canFuse(const NodePtr& node) const { // We can use optimized execution with fusions only in cases when dim rank is less or equal to the maximum // possible - if (node->getInputShapeAtPort(0).getRank() > MAX_ELTWISE_DIM_RANK) + if (node->getInputShapeAtPort(0).getRank() > MAX_ELTWISE_DIM_RANK) { return false; + } return true; } diff --git a/src/plugins/intel_cpu/src/nodes/embedding_bag.cpp b/src/plugins/intel_cpu/src/nodes/embedding_bag.cpp index 7727df6a32e5c4..c08234abdc96f9 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_bag.cpp +++ b/src/plugins/intel_cpu/src/nodes/embedding_bag.cpp @@ -27,14 +27,16 @@ EmbeddingBag::EmbeddingBag(const std::shared_ptr& op, DEFAULT_INDEX_IDX(defaultIndexIdx), _layerName(op->get_friendly_name()) { std::string logPrefix = std::string("Layer EmbeddingBag with name '") + _layerName + "' "; - if (op->get_input_size() < requiredInputNum || op->get_output_size() != 1) + if (op->get_input_size() < requiredInputNum || op->get_output_size() != 1) { OPENVINO_THROW(logPrefix, "has incorrect number of input or output edges!"); + } if ((op->get_input_size() > PER_SAMPLE_WEIGHTS_IDX)) { _withWeights = true; } if (_withWeights) { - if (op->get_input_shape(PER_SAMPLE_WEIGHTS_IDX) != op->get_input_shape(INDICES_IDX)) + if (op->get_input_shape(PER_SAMPLE_WEIGHTS_IDX) != op->get_input_shape(INDICES_IDX)) { OPENVINO_THROW(logPrefix, "must have equal shapes for indices and per_sample_weights inputs."); + } } } @@ -60,8 +62,9 @@ void EmbeddingBag::processData(const T* srcData, auto threadBody = [&](const int ithr, const int nthr) { size_t start(0lu), end(0lu); splitter(outputBagsNum, nthr, ithr, start, end); - if (start >= end) + if (start >= end) { return; + } size_t indicesSize = 0lu; const int* indices = nullptr; diff --git a/src/plugins/intel_cpu/src/nodes/embedding_bag_offsets.cpp b/src/plugins/intel_cpu/src/nodes/embedding_bag_offsets.cpp index 00be05c6cb43b8..f76bbef120a225 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_bag_offsets.cpp +++ b/src/plugins/intel_cpu/src/nodes/embedding_bag_offsets.cpp @@ -53,16 +53,19 @@ EmbeddingBagOffset::EmbeddingBagOffset(const std::shared_ptr& op, cons ov::as_string(offsets_op->get_reduction())); } } - if (getInputShapeAtPort(INDICES_IDX).getRank() != 1ul) + if (getInputShapeAtPort(INDICES_IDX).getRank() != 1ul) { OPENVINO_THROW("'", _layerName, "' layer has indices data with invalid rank."); + } - if (getInputShapeAtPort(OFFSETS_IDX).getRank() != 1ul) + if (getInputShapeAtPort(OFFSETS_IDX).getRank() != 1ul) { OPENVINO_THROW("'", _layerName, "' layer's offsets data has invalid rank."); + } } void EmbeddingBagOffset::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } std::string logPrefix = std::string("Layer EmbeddingBag with name '") + _layerName + "' "; static const std::set supportedPrecisions = {ov::element::f32, @@ -71,27 +74,32 @@ void EmbeddingBagOffset::initSupportedPrimitiveDescriptors() { ov::element::i32}; auto inDataPrecision = getOriginalInputPrecisionAtPort(EMB_TABLE_IDX); - if (one_of(inDataPrecision, ov::element::bf16, ov::element::f16)) + if (one_of(inDataPrecision, ov::element::bf16, ov::element::f16)) { inDataPrecision = ov::element::f32; + } if (!supportedPrecisions.empty()) { - if (supportedPrecisions.find(inDataPrecision) == supportedPrecisions.end()) + if (supportedPrecisions.find(inDataPrecision) == supportedPrecisions.end()) { OPENVINO_THROW(logPrefix, "has unsupported precision: ", inDataPrecision.get_type_name()); + } } else { static const std::set defaultSupportedPrecisions = {ov::element::f32, ov::element::i8, ov::element::u8, ov::element::i32}; - if (defaultSupportedPrecisions.find(inDataPrecision) == defaultSupportedPrecisions.end()) + if (defaultSupportedPrecisions.find(inDataPrecision) == defaultSupportedPrecisions.end()) { OPENVINO_THROW(logPrefix, "has unsupported precision: ", inDataPrecision.get_type_name()); + } } std::vector inDataConfigurators({{LayoutType::ncsp, inDataPrecision}, {LayoutType::ncsp, ov::element::i32}, {LayoutType::ncsp, ov::element::i32}}); - if (inputShapes.size() > DEFAULT_INDEX_IDX) + if (inputShapes.size() > DEFAULT_INDEX_IDX) { inDataConfigurators.push_back({LayoutType::ncsp, ov::element::i32}); - if (inputShapes.size() > PER_SAMPLE_WEIGHTS_IDX) + } + if (inputShapes.size() > PER_SAMPLE_WEIGHTS_IDX) { inDataConfigurators.push_back({LayoutType::ncsp, inDataPrecision}); + } addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any); } @@ -127,10 +135,11 @@ void EmbeddingBagOffset::getIndices(size_t embIndex, size = 0lu; withWeight = _withWeights; - if (embIndex == _offsetsLen - 1lu) + if (embIndex == _offsetsLen - 1lu) { size = _indicesLen - offsetsData_[embIndex]; - else + } else { size = offsetsData_[embIndex + 1lu] - offsetsData_[embIndex]; + } if (size != 0lu) { indices = indicesData_ + offsetsData_[embIndex]; @@ -144,8 +153,9 @@ void EmbeddingBagOffset::getIndices(size_t embIndex, return; } - if (withWeight) + if (withWeight) { weightsIdx = offsetsData_[embIndex]; + } } void EmbeddingBagOffset::executeDynamicImpl(const dnnl::stream& strm) { @@ -159,8 +169,9 @@ bool EmbeddingBagOffset::isExecutable() const { void EmbeddingBagOffset::execute(const dnnl::stream& strm) { const auto* srcData = getSrcDataAtPortAs(0); const uint8_t* weightsData = nullptr; - if (_withWeights) + if (_withWeights) { weightsData = getSrcDataAtPortAs(PER_SAMPLE_WEIGHTS_IDX); + } const auto& inputMem = getParentEdgeAt(0)->getMemory(); EmbeddingBag::execute(srcData, diff --git a/src/plugins/intel_cpu/src/nodes/embedding_bag_packed.cpp b/src/plugins/intel_cpu/src/nodes/embedding_bag_packed.cpp index 2f56e2f7b3c3a4..21650ad3e6319d 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_bag_packed.cpp +++ b/src/plugins/intel_cpu/src/nodes/embedding_bag_packed.cpp @@ -53,13 +53,15 @@ EmbeddingBagPacked::EmbeddingBagPacked(const std::shared_ptr& op, cons ov::as_string(packed_op->get_reduction())); } } - if (getInputShapeAtPort(INDICES_IDX).getRank() != 2ul) + if (getInputShapeAtPort(INDICES_IDX).getRank() != 2ul) { OPENVINO_THROW("'", _layerName, "' layer has indices data with invalid rank."); + } } void EmbeddingBagPacked::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } std::string logPrefix = std::string("Layer EmbeddingBag with name '") + _layerName + "' "; static const std::set supportedPrecisions = {ov::element::f32, @@ -68,24 +70,28 @@ void EmbeddingBagPacked::initSupportedPrimitiveDescriptors() { ov::element::i32}; auto inDataPrecision = getOriginalInputPrecisionAtPort(EMB_TABLE_IDX); - if (one_of(inDataPrecision, ov::element::bf16, ov::element::f16)) + if (one_of(inDataPrecision, ov::element::bf16, ov::element::f16)) { inDataPrecision = ov::element::f32; + } if (!supportedPrecisions.empty()) { - if (supportedPrecisions.find(inDataPrecision) == supportedPrecisions.end()) + if (supportedPrecisions.find(inDataPrecision) == supportedPrecisions.end()) { OPENVINO_THROW(logPrefix, "has unsupported precision: ", inDataPrecision.get_type_name()); + } } else { static const std::set defaultSupportedPrecisions = {ov::element::f32, ov::element::i8, ov::element::u8, ov::element::i32}; - if (defaultSupportedPrecisions.find(inDataPrecision) == defaultSupportedPrecisions.end()) + if (defaultSupportedPrecisions.find(inDataPrecision) == defaultSupportedPrecisions.end()) { OPENVINO_THROW(logPrefix, "has unsupported precision: ", inDataPrecision.get_type_name()); + } } std::vector inDataConfigurators( {{LayoutType::ncsp, inDataPrecision}, {LayoutType::ncsp, ov::element::i32}}); - if (inputShapes.size() > PER_SAMPLE_WEIGHTS_IDX) + if (inputShapes.size() > PER_SAMPLE_WEIGHTS_IDX) { inDataConfigurators.push_back({LayoutType::ncsp, inDataPrecision}); + } addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any); } @@ -105,8 +111,9 @@ void EmbeddingBagPacked::getIndices(size_t embIndex, size_t& size, int& weightsIdx, bool& withWeight) { - if (static_cast(embIndex) >= _batch * _indicesPerBag) + if (static_cast(embIndex) >= _batch * _indicesPerBag) { OPENVINO_THROW("Invalid embedding bag index."); + } withWeight = true; @@ -127,8 +134,9 @@ bool EmbeddingBagPacked::isExecutable() const { void EmbeddingBagPacked::execute(const dnnl::stream& strm) { const auto* srcData = getSrcDataAtPortAs(0); const uint8_t* weightsData = nullptr; - if (_withWeights) + if (_withWeights) { weightsData = getSrcDataAtPortAs(PER_SAMPLE_WEIGHTS_IDX); + } const auto& inputMem = getParentEdgeAt(0)->getMemory(); EmbeddingBag::execute(srcData, diff --git a/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.cpp b/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.cpp index 4d9ff3af48b163..6339bff1f56f86 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.cpp +++ b/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.cpp @@ -37,16 +37,19 @@ EmbeddingSegmentsSum::EmbeddingSegmentsSum(const std::shared_ptr& op, } _reduction = Reduction::SUM; std::string errPrefix = std::string("EmbeddingSegmentsSum layer with name '") + _layerName + "' "; - if (getInputShapeAtPort(INDICES_IDX).getRank() != 1ul) + if (getInputShapeAtPort(INDICES_IDX).getRank() != 1ul) { OPENVINO_THROW(errPrefix, "has indices data with invalid rank: ", getInputShapeAtPort(INDICES_IDX).getRank()); + } - if (getInputShapeAtPort(SEGMENT_ID_IDX).getRank() != 1ul) + if (getInputShapeAtPort(SEGMENT_ID_IDX).getRank() != 1ul) { OPENVINO_THROW(errPrefix, "has invalid segmentID data rank: ", getInputShapeAtPort(SEGMENT_ID_IDX).getRank()); + } } void EmbeddingSegmentsSum::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } std::string logPrefix = std::string("Layer EmbeddingBag with name '") + _layerName + "' "; static const std::set supportedPrecisions = {ov::element::f32, @@ -55,28 +58,33 @@ void EmbeddingSegmentsSum::initSupportedPrimitiveDescriptors() { ov::element::i32}; auto inDataPrecision = getOriginalInputPrecisionAtPort(EMB_TABLE_IDX); - if (one_of(inDataPrecision, ov::element::bf16, ov::element::f16)) + if (one_of(inDataPrecision, ov::element::bf16, ov::element::f16)) { inDataPrecision = ov::element::f32; + } if (!supportedPrecisions.empty()) { - if (supportedPrecisions.find(inDataPrecision) == supportedPrecisions.end()) + if (supportedPrecisions.find(inDataPrecision) == supportedPrecisions.end()) { OPENVINO_THROW(logPrefix, "has unsupported precision: ", inDataPrecision.get_type_name()); + } } else { static const std::set defaultSupportedPrecisions = {ov::element::f32, ov::element::i8, ov::element::u8, ov::element::i32}; - if (defaultSupportedPrecisions.find(inDataPrecision) == defaultSupportedPrecisions.end()) + if (defaultSupportedPrecisions.find(inDataPrecision) == defaultSupportedPrecisions.end()) { OPENVINO_THROW(logPrefix, "has unsupported precision: ", inDataPrecision.get_type_name()); + } } std::vector inDataConfigurators({{LayoutType::ncsp, inDataPrecision}, {LayoutType::ncsp, ov::element::i32}, {LayoutType::ncsp, ov::element::i32}, {LayoutType::ncsp, ov::element::i32}}); - if (inputShapes.size() > DEFAULT_INDEX_IDX) + if (inputShapes.size() > DEFAULT_INDEX_IDX) { inDataConfigurators.push_back({LayoutType::ncsp, ov::element::i32}); - if (inputShapes.size() > PER_SAMPLE_WEIGHTS_IDX) + } + if (inputShapes.size() > PER_SAMPLE_WEIGHTS_IDX) { inDataConfigurators.push_back({LayoutType::ncsp, inDataPrecision}); + } addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any); } @@ -102,8 +110,9 @@ void EmbeddingSegmentsSum::getIndices(size_t embIndex, size_t& size, int& weightsIdx, bool& withWeight) { - if (embIndex >= static_cast(lastNumSegments_)) + if (embIndex >= static_cast(lastNumSegments_)) { OPENVINO_THROW("Invalid embedding bag index."); + } indices = nullptr; size = 0; @@ -123,8 +132,9 @@ void EmbeddingSegmentsSum::getIndices(size_t embIndex, if (size == 0) { size = 1lu; withWeight = false; - if (defaultIndices_) + if (defaultIndices_) { indices = defaultIndices_; + } return; } } @@ -156,8 +166,9 @@ bool EmbeddingSegmentsSum::isExecutable() const { void EmbeddingSegmentsSum::execute(const dnnl::stream& strm) { const auto* srcData = getSrcDataAtPortAs(0); const uint8_t* weightsData = nullptr; - if (_withWeights) + if (_withWeights) { weightsData = getSrcDataAtPortAs(PER_SAMPLE_WEIGHTS_IDX); + } const auto& inputMem = getParentEdgeAt(0)->getMemory(); EmbeddingBag::execute(srcData, diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.cpp index ff818830ba8c5a..81a7fbc230f1e8 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.cpp @@ -280,8 +280,9 @@ bool AclDeconvExecutorBuilder::customIsSupported(const DeconvAttrs& deconvAttrs, (deconvAttrs.dilation.size() > 1) ? deconvAttrs.dilation.at(1) : deconvAttrs.dilation.at(0); unsigned int dilation_y = deconvAttrs.dilation.at(0); if (!one_of(dilation_x, static_cast(0), static_cast(1)) || - !one_of(dilation_y, static_cast(0), static_cast(1))) + !one_of(dilation_y, static_cast(0), static_cast(1))) { return false; + } try { arm_compute::Status status = diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp index 7a8e431b606227..1d7025fa0b6833 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp @@ -85,8 +85,9 @@ bool AclEltwiseExecutorBuilder::isSupported(const EltwiseAttrs& eltwiseAttrs, auto checkPrecision = [&srcDescs, &dstDescs](std::vector srcVecPrc, ov::element::Type dstPrc) -> bool { for (size_t i = 0; i < srcDescs.size(); i++) { - if (srcDescs[i]->getPrecision() != srcVecPrc[i]) + if (srcDescs[i]->getPrecision() != srcVecPrc[i]) { return false; + } } if (dstDescs[0]->getPrecision() != dstPrc) { return false; @@ -262,8 +263,9 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs& eltwiseAttrs, if (!NEArithmeticAddition::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0], - ConvertPolicy::SATURATE)) + ConvertPolicy::SATURATE)) { return false; + } exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0], ConvertPolicy::SATURATE); @@ -276,8 +278,9 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs& eltwiseAttrs, &dstTensorsInfo[0], 1.0f, ConvertPolicy::SATURATE, - RoundingPolicy::TO_ZERO)) + RoundingPolicy::TO_ZERO)) { return false; + } exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], @@ -293,8 +296,9 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs& eltwiseAttrs, if (!NEArithmeticSubtraction::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0], - ConvertPolicy::SATURATE)) + ConvertPolicy::SATURATE)) { return false; + } exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0], ConvertPolicy::SATURATE); @@ -302,8 +306,9 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs& eltwiseAttrs, }; break; case Algorithm::EltwiseDivide: - if (!NEElementwiseDivision::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0])) + if (!NEElementwiseDivision::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0])) { return false; + } exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0]); @@ -311,8 +316,9 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs& eltwiseAttrs, }; break; case Algorithm::EltwiseMaximum: - if (!NEElementwiseMax::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0])) + if (!NEElementwiseMax::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0])) { return false; + } exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0]); @@ -320,8 +326,9 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs& eltwiseAttrs, }; break; case Algorithm::EltwiseMinimum: - if (!NEElementwiseMin::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0])) + if (!NEElementwiseMin::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0])) { return false; + } exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0]); @@ -329,8 +336,9 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs& eltwiseAttrs, }; break; case Algorithm::EltwiseSquaredDifference: - if (!NEElementwiseSquaredDiff::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0])) + if (!NEElementwiseSquaredDiff::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0])) { return false; + } exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0]); @@ -341,8 +349,9 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs& eltwiseAttrs, if (!NEElementwiseComparison::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0], - ComparisonOperation::Equal)) + ComparisonOperation::Equal)) { return false; + } exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0], ComparisonOperation::Equal); @@ -353,8 +362,9 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs& eltwiseAttrs, if (!NEElementwiseComparison::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0], - ComparisonOperation::NotEqual)) + ComparisonOperation::NotEqual)) { return false; + } exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0], ComparisonOperation::NotEqual); @@ -365,8 +375,9 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs& eltwiseAttrs, if (!NEElementwiseComparison::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0], - ComparisonOperation::Greater)) + ComparisonOperation::Greater)) { return false; + } exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0], ComparisonOperation::Greater); @@ -377,8 +388,9 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs& eltwiseAttrs, if (!NEElementwiseComparison::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0], - ComparisonOperation::GreaterEqual)) + ComparisonOperation::GreaterEqual)) { return false; + } exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0], ComparisonOperation::GreaterEqual); @@ -389,8 +401,9 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs& eltwiseAttrs, if (!NEElementwiseComparison::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0], - ComparisonOperation::Less)) + ComparisonOperation::Less)) { return false; + } exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0], ComparisonOperation::Less); @@ -401,8 +414,9 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs& eltwiseAttrs, if (!NEElementwiseComparison::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0], - ComparisonOperation::LessEqual)) + ComparisonOperation::LessEqual)) { return false; + } exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0], ComparisonOperation::LessEqual); @@ -410,8 +424,9 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs& eltwiseAttrs, }; break; case Algorithm::EltwiseAbs: - if (!NEAbsLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0])) + if (!NEAbsLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0])) { return false; + } exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &dstTensors[0]); @@ -419,8 +434,9 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs& eltwiseAttrs, }; break; case Algorithm::EltwiseExp: - if (!NEExpLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0])) + if (!NEExpLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0])) { return false; + } exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &dstTensors[0]); @@ -428,8 +444,9 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs& eltwiseAttrs, }; break; case Algorithm::EltwisePrelu: - if (!NEPReluLayer::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0])) + if (!NEPReluLayer::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0])) { return false; + } exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0]); @@ -451,8 +468,9 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs& eltwiseAttrs, getActivationLayerInfo(aclEltwiseAttrs.algorithm, aclEltwiseAttrs.alpha, aclEltwiseAttrs.beta, - aclEltwiseAttrs.gamma))) + aclEltwiseAttrs.gamma))) { return false; + } exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], @@ -465,8 +483,9 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs& eltwiseAttrs, }; break; case Algorithm::EltwiseLog: - if (!NELogLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0])) + if (!NELogLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0])) { return false; + } exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &dstTensors[0]); diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected_utils.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected_utils.cpp index 9237b231d8fa5b..2f083e3a6aaf7b 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected_utils.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected_utils.cpp @@ -132,8 +132,9 @@ MemoryPtr acl_fc_executor::reorderData(const DnnlMemoryDescPtr& srcWeightDesc, const ExecutorContext::CPtr& context) { MemoryPtr input = std::make_shared(context->getEngine(), srcWeightDesc, weightsMem->getData()); MemoryPtr output = std::make_shared(context->getEngine(), dstWeightDesc); - if (!input->getDesc().isDefined() || !output->getDesc().isDefined()) + if (!input->getDesc().isDefined() || !output->getDesc().isDefined()) { OPENVINO_THROW("Can't reorder data with dynamic shapes"); + } if (input->getShape().hasZeroDims() || output->getShape().hasZeroDims()) { return output; diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn.cpp index 7c4223bfcf63fd..a74eeb3c192fe8 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn.cpp @@ -55,8 +55,9 @@ bool AclMVNExecutor::init(const MVNAttrs& mvnAttrs, precisionToAclDataType(dstDescs[0]->getPrecision()), getAclDataLayoutByMemoryDesc(dstDescs[0])); - if (!arm_compute::NEMeanStdDevNormalizationLayer::validate(&srcTensorInfo, &dstTensorInfo, mvnAttrs.epsValue_)) + if (!arm_compute::NEMeanStdDevNormalizationLayer::validate(&srcTensorInfo, &dstTensorInfo, mvnAttrs.epsValue_)) { return false; + } srcTensor.allocator()->init(srcTensorInfo); dstTensor.allocator()->init(dstTensorInfo); diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp index b581d696817eb2..fdb08704647851 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp @@ -154,8 +154,9 @@ bool AclPoolingExecutor::init(const PoolingAttrs& poolingAttrs, getAclDataLayoutByMemoryDesc(srcDescs[0]), nullptr, nullptr, - &pool_info)) + &pool_info)) { return false; + } exec_func = [this, pool_info]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensor, &dstTensor, pool_info); @@ -173,8 +174,9 @@ bool AclPoolingExecutor::init(const PoolingAttrs& poolingAttrs, getAclDataLayoutByMemoryDesc(srcDescs[0]), &dstDescs[1]->getShape().getStaticDims(), &pool_info, - nullptr)) + nullptr)) { return false; + } auto indDims = dstDescs[1]->getShape().getStaticDims(); TensorInfo indTensorInfo = TensorInfo(shapeCast(indDims), 1, @@ -195,8 +197,9 @@ bool AclPoolingExecutor::init(const PoolingAttrs& poolingAttrs, getAclDataLayoutByMemoryDesc(srcDescs[0]), nullptr, &pool_info, - nullptr)) + nullptr)) { return false; + } exec_func = [this, pool_info]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensor, &dstTensor, pool_info); diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.cpp index 29b08f54409a38..af53506adfb021 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.cpp @@ -66,8 +66,9 @@ bool AclReduceExecutor::init(const ReduceAttrs& reduceAttrs, for (size_t i = 0; i < reduceAttrs.axes.size(); ++i) { int axis = axisCast(reduceAttrs.axes[i], srcDims.size(), hasSrcNspcLayout ? NHWC_TO_NCHW : NO_LAYOUT_CONVERSION); - if (hasSrcNspcLayout && axis == -1) + if (hasSrcNspcLayout && axis == -1) { return false; + } castedAxes.push_back(axis); } switch (reduceAttrs.operation) { diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_utils.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_utils.hpp index 1d30736353b878..427e82f40c546c 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_utils.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_utils.hpp @@ -99,15 +99,19 @@ inline int axisCast(const std::size_t axis, case NO_LAYOUT_CONVERSION: return revertedAxis; case NHWC_TO_NCHW: - if (shapeSize == 4) + if (shapeSize == 4) { return nhwcToNchw[revertedAxis]; - if (shapeSize == 5) + } + if (shapeSize == 5) { return ndhwcToNcdhw[revertedAxis]; + } case NCHW_TO_NHWC: - if (shapeSize == 4) + if (shapeSize == 4) { return nchwToNhwc[revertedAxis]; - if (shapeSize == 5) + } + if (shapeSize == 5) { return ncdhwToNdhwc[revertedAxis]; + } default: return -1; } diff --git a/src/plugins/intel_cpu/src/nodes/executors/common/common_utils.hpp b/src/plugins/intel_cpu/src/nodes/executors/common/common_utils.hpp index 614caead1a39b1..af393676b08512 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/common/common_utils.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/common/common_utils.hpp @@ -17,22 +17,25 @@ namespace ov { namespace intel_cpu { OV_CPU_MAYBE_UNUSED_FUNCTION static std::vector getDeQuantizedScales(const MemoryArgs& memory) { - if (!memory.count(ARG_DST_DEQ_SCALE)) + if (!memory.count(ARG_DST_DEQ_SCALE)) { return {}; + } auto scalesMemory = memory.at(ARG_DST_DEQ_SCALE); auto scalesData = static_cast(scalesMemory->getData()); - if (!scalesData) + if (!scalesData) { return {}; + } auto dstShape = memory.at(ARG_DST)->getShape(); auto dqScalesShape = scalesMemory->getShape(); auto scalesDims = getNormalizedDimsBySize(dqScalesShape.getDims(), dstShape.getDims().size()); - auto scaleSize = std::accumulate(scalesDims.begin(), scalesDims.end(), std::size_t(1), std::multiplies()); + auto scaleSize = + std::accumulate(scalesDims.begin(), scalesDims.end(), static_cast(1), std::multiplies()); std::vector DQScales(scaleSize, 1.0); @@ -43,8 +46,9 @@ OV_CPU_MAYBE_UNUSED_FUNCTION static std::vector getDeQuantizedScales(cons scaleSize); // @todo do we really need to broadcast dq scales and then resize them back? - if (scaleSize > DQScales.size()) + if (scaleSize > DQScales.size()) { DQScales.resize(scaleSize, DQScales[0]); + } if (1 == scaleSize) { std::transform(DQScales.begin(), DQScales.end(), DQScales.begin(), [=](float val) { return (scalesData[0] * val); @@ -56,8 +60,9 @@ OV_CPU_MAYBE_UNUSED_FUNCTION static std::vector getDeQuantizedScales(cons } if (std::all_of(DQScales.begin(), DQScales.end(), [&](float val) { return (val == DQScales[0]); - })) + })) { DQScales.resize(1); + } return DQScales; } diff --git a/src/plugins/intel_cpu/src/nodes/executors/common/ref_transpose.cpp b/src/plugins/intel_cpu/src/nodes/executors/common/ref_transpose.cpp index 7281f3db98f4c0..99d3729e0001cf 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/common/ref_transpose.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/common/ref_transpose.cpp @@ -21,10 +21,11 @@ static inline size_t parallel_init(size_t start, size_t nDims, const VectorDims& static inline void parallel_step(size_t nDims, const VectorDims& dims, VectorDims& indexes) { for (int j = nDims - 1; j >= 0; --j) { ++indexes[j]; - if (indexes[j] < dims[j]) + if (indexes[j] < dims[j]) { break; - else + } else { indexes[j] = 0; + } } } @@ -38,15 +39,17 @@ void RefTransposeExecutor::referenceExecute(const uint8_t* src_data, const size_t data_size = jcp.data_size; const size_t ndims = dst_dims.size(); - if (static_cast(dst_dims[0]) != mb) + if (static_cast(dst_dims[0]) != mb) { dst_dims[0] = mb; + } size_t work_amount = std::accumulate(dst_dims.begin(), dst_dims.end(), 1, std::multiplies()); auto get_idx = [ndims, data_size](const VectorDims& indexes, const VectorDims& strides) { size_t idx = 0; - for (size_t i = 0; i < ndims; ++i) + for (size_t i = 0; i < ndims; ++i) { idx += indexes[i] * strides[i]; + } return idx * data_size; }; diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_convolution_primitive.cpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_convolution_primitive.cpp index 4aef57ac484926..939aff37f8275e 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_convolution_primitive.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_convolution_primitive.cpp @@ -135,8 +135,9 @@ static primitive_desc createPrimitiveDesc(const dnnl::engine& engine, for (auto preferredImplType : implPriorities) { const bool found = DnnlExtensionUtils::find_implementation(prim_desc, preferredImplType); - if (found) + if (found) { return std::move(prim_desc); + } } return std::move(first_desc); diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected.hpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected.hpp index a4aeac36a4eedb..aed6db7ef60514 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected.hpp @@ -57,10 +57,12 @@ class DnnlFCExecutor : public Executor { } void execute(const MemoryArgs& memory) override { - if (resetSrcMemoryDataHandle) + if (resetSrcMemoryDataHandle) { m_primArgs[DNNL_ARG_SRC].set_data_handle(memory.at(ARG_SRC)->getData()); - if (resetDstMemoryDataHandle) + } + if (resetDstMemoryDataHandle) { m_primArgs[DNNL_ARG_DST].set_data_handle(memory.at(ARG_DST)->getData()); + } m_primitive->execute(m_primArgs); } @@ -121,8 +123,9 @@ class DnnlFCExecutor : public Executor { const PrimitivePtr newPrimitive, const MemoryPtr& memory) { const auto newPrimMemDesc = newPrimitive->weightsDesc(); - if (currentPrimitive && currentPrimitive->weightsDesc()->isCompatible(*newPrimMemDesc)) + if (currentPrimitive && currentPrimitive->weightsDesc()->isCompatible(*newPrimMemDesc)) { return; + } originalMemDesc = Primitive::makeTransposedWeightDescriptor(originalMemDesc, newPrimMemDesc, m_attrs.weightsNonTransposed); @@ -138,8 +141,9 @@ class DnnlFCExecutor : public Executor { void updateScratchPadMem(const PrimitivePtr currentPrimitive, const PrimitivePtr newPrimitive) { const auto newPrimMemDesc = newPrimitive->scratchPadDesc(); // @todo should we compare dnnl::memory::desc directly to avoid any overhead? - if (currentPrimitive && currentPrimitive->scratchPadDesc()->isCompatible(*newPrimMemDesc)) + if (currentPrimitive && currentPrimitive->scratchPadDesc()->isCompatible(*newPrimMemDesc)) { return; + } m_scratchPadMemory = m_context->getScratchPad()->createScratchPadMem(newPrimMemDesc); m_primArgs[DNNL_ARG_SCRATCHPAD] = m_scratchPadMemory->getPrimitive(); diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp index 8c1894f43552f1..6fa0cffa95017f 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp @@ -112,8 +112,9 @@ std::shared_ptr DnnlFCPrimitive::create(const MemoryArgs& memor DnnlMemoryDescPtr DnnlFCPrimitive::makeTransposedWeightDescriptor(const DnnlMemoryDescPtr& srcDesc, const DnnlMemoryDescPtr& dstDesc, bool weightsNonTransposed) { - if (!weightsNonTransposed) + if (!weightsNonTransposed) { return srcDesc; + } const auto& weiDesc = srcDesc->getDnnlDesc(); auto wDims = weiDesc.get_dims(); @@ -128,15 +129,17 @@ bool DnnlFCPrimitive::useWeightsDecompressionImpl(const ov::element::Type inputT const ov::element::Type weightsType, const ov::intel_cpu::Config::ModelType modelType) { if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2)) { - if (one_of(inputType, f32, bf16) && one_of(weightsType, u8, i8, nf4, u4, i4, f4e2m1)) + if (one_of(inputType, f32, bf16) && one_of(weightsType, u8, i8, nf4, u4, i4, f4e2m1)) { return true; + } if (modelType == ov::intel_cpu::Config::ModelType::LLM) { // f16c kernel saves memory footprint with additional decompression computational overhead // which is only meaningful on LLM with small batch-size. // TODO: fall-back to use f32 weights on large batch-size - if (inputType == f32 && one_of(weightsType, f16, bf16)) + if (inputType == f32 && one_of(weightsType, f16, bf16)) { return true; + } } } return false; @@ -147,15 +150,18 @@ static bool useDynamicQuantizationImpl(size_t dqGroupSize, const MemoryDescPtr& weightsDesc, const MemoryArgs& memory, bool needTranspose) { - if (dqGroupSize == 0) + if (dqGroupSize == 0) { return false; + } if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni) && - !dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_vnni)) + !dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_vnni)) { return false; + } - if (srcDesc->getPrecision() != ov::element::f32) + if (srcDesc->getPrecision() != ov::element::f32) { return false; + } MemoryCPtr zpPtr = memory.count(ARG_WEI | ARG_ATTR_ZERO_POINTS) ? memory.at(ARG_WEI | ARG_ATTR_ZERO_POINTS) : nullptr; @@ -163,20 +169,24 @@ static bool useDynamicQuantizationImpl(size_t dqGroupSize, // To support dynamic quantization with weights symmetrically quantized as i8/i4 // w/o zero-point, we will transform weight to u8/u4 weight with zp 128/8. if (!one_of(weightsDesc->getPrecision(), ov::element::u8, ov::element::u4) && - !((one_of(weightsDesc->getPrecision(), ov::element::i8, ov::element::i4) && !zpPtr))) + !((one_of(weightsDesc->getPrecision(), ov::element::i8, ov::element::i4) && !zpPtr))) { return false; + } - if (zpPtr && !one_of(zpPtr->getDesc().getPrecision(), ov::element::u8, ov::element::u4, ov::element::undefined)) + if (zpPtr && !one_of(zpPtr->getDesc().getPrecision(), ov::element::u8, ov::element::u4, ov::element::undefined)) { return false; + } // TODO: heuristic: disable avx2 asymmetric bool is_asymmetric_weights = one_of(weightsDesc->getPrecision(), ov::element::u8, ov::element::u4); - if (is_asymmetric_weights && !dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_vnni)) + if (is_asymmetric_weights && !dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_vnni)) { return false; + } const size_t simdWidth = 16; - if (dqGroupSize % simdWidth) + if (dqGroupSize % simdWidth) { return false; + } if (weightsDesc->getPrecision() == ov::element::u4) { int ic = weightsDesc->getShape().getStaticDims()[1]; @@ -197,8 +207,9 @@ static bool useDynamicQuantizationImpl(size_t dqGroupSize, } const size_t minLoopSize = 8; - if (minGroupSize != INT_MAX && minGroupSize % minLoopSize) + if (minGroupSize != INT_MAX && minGroupSize % minLoopSize) { return false; + } } return true; @@ -225,8 +236,9 @@ static DnnlPrimitiveAttrs createPrimitiveAttrs(const FCAttrs& attrs, if (memory.count(ARG_WEI | ARG_ATTR_SCALES)) { auto dstPrc = memory.at(ARG_WEI | ARG_ATTR_SCALES)->getPrecision(); - if (dstPrc != f8e8m0 || useDynamicQuantization) + if (dstPrc != f8e8m0 || useDynamicQuantization) { dstPrc = ov::element::f32; + } dnnlpoc.appendDecompressionScalesLegacy(memory.at(ARG_WEI | ARG_ATTR_SCALES), !attrs.weightsNonTransposed, @@ -262,8 +274,9 @@ static DnnlPrimitiveAttrs createPrimitiveAttrs(const FCAttrs& attrs, static dnnl::memory::desc normalizeDescriptor(const dnnl::memory::desc& desc) { const auto& dims = desc.get_dims(); - if (dims.size() > 2) + if (dims.size() > 2) { return desc.reshape(reshapeDownToRank<2>(dims)); + } return desc; } @@ -290,10 +303,12 @@ static dnnl::inner_product_forward::primitive_desc createDescriptorInternal(cons uint64_t dynQuantGroupSize = 0; attr.get_src_dyn_quant_params(dynQuantGroupSize); if (dynQuantGroupSize > 0) { - if (wdt == dnnl::memory::data_type::s8) + if (wdt == dnnl::memory::data_type::s8) { wdt = memory::data_type::u8; - if (wdt == dnnl::memory::data_type::s4) + } + if (wdt == dnnl::memory::data_type::s4) { wdt = memory::data_type::u4; + } } } else if (indt == dnnl::memory::data_type::u8 || indt == dnnl::memory::data_type::s8) { wdt = memory::data_type::s8; @@ -336,8 +351,9 @@ static primitive_desc createPrimitiveDesc(const dnnl::memory::desc& inputDesc, return contains(implPriorities, implType); }); - if (found) + if (found) { return std::move(prim_desc); + } return std::move(first_desc); } @@ -395,8 +411,9 @@ DnnlShapeAgnosticDataPtr DnnlFCPrimitive::createShapeAgnosticData(const FCAttrs& const auto postOpData = createPrimitiveAttrs(attrs, postOps, memory, context, useDynamicQuantization); - if (!cacheWeights) + if (!cacheWeights) { return std::make_shared(postOpData); + } if (srcDesc->getShape().isDynamic()) { const auto& inShape = srcDesc->getShape(); diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_matmul_primitive.cpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_matmul_primitive.cpp index 1b51487fb4cebf..d4fe05c66c281a 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_matmul_primitive.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_matmul_primitive.cpp @@ -79,8 +79,9 @@ bool DnnlMatMulPrimitive::Key::operator==(const Key& rhs) const { template static dimsType normalizeToRank(const dimsType& vec, size_t rank) { - if (vec.size() == rank || vec.empty()) + if (vec.size() == rank || vec.empty()) { return vec; + } dimsType result; result.reserve(rank); @@ -238,8 +239,9 @@ static primitive_desc createPrimitiveDesc(const dnnl::memory::desc& inputDesc, return contains(implPriorities, implType); }); - if (found) + if (found) { return std::move(prim_desc); + } return std::move(first_desc); } @@ -278,8 +280,9 @@ static VectorDims makeDummyOutputDims(const VectorDims& inShape, const VectorDim bool DnnlMatMulPrimitive::useWeightsDecompressionImpl(const ov::element::Type inputType, const ov::element::Type weightsType) { #if defined(OPENVINO_ARCH_X86_64) - if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2)) + if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2)) { return false; + } #endif return (one_of(inputType, f32, bf16, f16) && one_of(weightsType, u8, i8, u4, i4)); @@ -301,8 +304,9 @@ DnnlShapeAgnosticDataPtr DnnlMatMulPrimitive::createShapeAgnosticData(const FCAt const auto postOpData = createPrimitiveAttrs(mmAttrs, postOps, memory, context, useWeightsDecompression, attrs.weightsNonTransposed); - if (!cacheWeights) + if (!cacheWeights) { return std::make_shared(postOpData); + } if (srcDesc->getShape().isDynamic()) { const auto& inShape = srcDesc->getShape(); diff --git a/src/plugins/intel_cpu/src/nodes/executors/executor_implementation.hpp b/src/plugins/intel_cpu/src/nodes/executors/executor_implementation.hpp index 375016038f2b68..e4c8ab31e41921 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/executor_implementation.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/executor_implementation.hpp @@ -74,8 +74,9 @@ class ExecutorImplementation { const ExecutorContext::CPtr context) const { DEBUG_LOG("Creating executor using implementation: ", m_name); - if (m_create) + if (m_create) { return m_create(attrs, postOps, memory, context); + } return nullptr; } diff --git a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp index 792aacf54a118a..86fd065e50180f 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp @@ -149,14 +149,17 @@ static bool fullyMatchConfiguration(const MemoryDescArgs& currentDescriptors, const auto& type = typeConfig[i]; const auto& desc = currentDescriptors.at(notation[i]); - if (desc->empty()) + if (desc->empty()) { continue; + } - if (desc->getPrecision() != type) + if (desc->getPrecision() != type) { return false; // type mismatch + } - if (!desc->hasLayoutType(layoutConfig[i])) + if (!desc->hasLayoutType(layoutConfig[i])) { return false; // layout mismatch + } } return true; @@ -175,8 +178,9 @@ static MemoryDescArgs createOptimalDescriptors(const MemoryDescArgs& currentDesc const auto& type = typeConfig[i]; const auto& layout = layoutConfig[i]; - if (desc->empty()) + if (desc->empty()) { continue; + } if (descType == type && desc->hasLayoutType(layout)) { continue; diff --git a/src/plugins/intel_cpu/src/nodes/executors/interpolate.cpp b/src/plugins/intel_cpu/src/nodes/executors/interpolate.cpp index 189ae24bac808d..609868e326f3ec 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/interpolate.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/interpolate.cpp @@ -109,10 +109,11 @@ float ov::intel_cpu::InterpolateExecutor::coordTransToInput(int outCoord, break; } case InterpolateCoordTransMode::pytorch_half_pixel: { - if (outShape > 1) + if (outShape > 1) { return (outCoord + 0.5f) / scale - 0.5f; - else + } else { return 0; + } break; } case InterpolateCoordTransMode::asymmetric: { @@ -124,10 +125,11 @@ float ov::intel_cpu::InterpolateExecutor::coordTransToInput(int outCoord, break; } case InterpolateCoordTransMode::align_corners: { - if (outShape > 1) + if (outShape > 1) { return outCoord * (static_cast(inShape - 1) / static_cast(outShape - 1)); - else + } else { return 0; + } break; } default: { @@ -142,10 +144,11 @@ int ov::intel_cpu::InterpolateExecutor::nearestRound(float originCoord, InterpolateNearestMode nearestMode) const { switch (nearestMode) { case InterpolateNearestMode::round_prefer_floor: { - if (originCoord == (static_cast(originCoord) + 0.5f)) + if (originCoord == (static_cast(originCoord) + 0.5f)) { return static_cast(std::floor(originCoord)); - else + } else { return static_cast(std::round(originCoord)); + } break; } case InterpolateNearestMode::round_prefer_ceil: { @@ -161,10 +164,11 @@ int ov::intel_cpu::InterpolateExecutor::nearestRound(float originCoord, break; } case InterpolateNearestMode::simple: { - if (isDownsample) + if (isDownsample) { return static_cast(std::ceil(originCoord)); - else + } else { return static_cast(originCoord); + } } default: { OPENVINO_THROW("Interpolate executor does not support specified nearest round mode"); diff --git a/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_gemm.cpp b/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_gemm.cpp index 03daeba24c65d8..b450d0e646461a 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_gemm.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_gemm.cpp @@ -142,8 +142,9 @@ void MlasGemmExecutor::execute(const MemoryArgs& memory) { } void MlasGemmExecutor::moveMemToNumaNode(int numaNodeID) { - if (curNumaNode == numaNodeID) + if (curNumaNode == numaNodeID) { return; + } curNumaNode = numaNodeID; mbind_move(packedWeights, numaNodeID); if (m_attrs.withBias) { diff --git a/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_transpose.cpp b/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_transpose.cpp index 2b8b71bfbced0b..6fa8783c425448 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_transpose.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_transpose.cpp @@ -189,8 +189,8 @@ void MlasTransposeExecutor::TransposeSingleAxisOutwards(const MemoryCPtr& input, auto num_loops = calcShapeSize(input_shape, 0, to); auto num_writers = input_dims[from]; auto block_size = calcShapeSize(input_shape, from + 1, input_shape.getRank()); - auto writes_per_loop = int64_t(input_shape.getElementsCount() / num_loops / block_size); - auto writes_per_writer_per_loop = int64_t(writes_per_loop / num_writers); + auto writes_per_loop = static_cast(input_shape.getElementsCount() / num_loops / block_size); + auto writes_per_writer_per_loop = static_cast(writes_per_loop / num_writers); // TODO: check integer overflow const size_t bytes_per_write = static_cast(block_size) * element_size; @@ -267,8 +267,8 @@ void MlasTransposeExecutor::TransposeSingleAxisInwards(const MemoryCPtr& input, auto num_loops = calcShapeSize(input_shape, 0, from); auto num_readers = input_dims[from]; auto block_size = calcShapeSize(input_shape, to + 1, input_shape.getRank()); - auto reads_per_loop = int64_t(input_shape.getElementsCount() / num_loops / block_size); - auto reads_per_reader_per_loop = int64_t(reads_per_loop / num_readers); + auto reads_per_loop = static_cast(input_shape.getElementsCount() / num_loops / block_size); + auto reads_per_reader_per_loop = static_cast(reads_per_loop / num_readers); // TODO: check integer overflow const size_t bytes_per_read = static_cast(block_size) * element_size; diff --git a/src/plugins/intel_cpu/src/nodes/executors/precision_translation.cpp b/src/plugins/intel_cpu/src/nodes/executors/precision_translation.cpp index 36aab4f8fddc77..31f8ed874ec3cb 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/precision_translation.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/precision_translation.cpp @@ -23,12 +23,14 @@ InOutTypes getTypeConfiguration(const MemoryDescArgs& descriptors, }); for (const auto& entry : mapping) { - if (!entry.enabled()) + if (!entry.enabled()) { continue; + } const auto& pattern = entry.mask(); - if (!match(pattern, types)) + if (!match(pattern, types)) { continue; + } return entry.translate(types); } diff --git a/src/plugins/intel_cpu/src/nodes/executors/precision_translation.hpp b/src/plugins/intel_cpu/src/nodes/executors/precision_translation.hpp index 20e613eea2c236..dc39449582fc33 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/precision_translation.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/precision_translation.hpp @@ -93,14 +93,16 @@ class TypeMappingEntry { } InOutTypes translate(const InOutTypes& types) const { - if (m_translation) + if (m_translation) { return m_translation(types); + } return {}; } bool enabled() const { - if (m_enabled) + if (m_enabled) { return m_enabled(); + } return true; } diff --git a/src/plugins/intel_cpu/src/nodes/executors/shl/shl.hpp b/src/plugins/intel_cpu/src/nodes/executors/shl/shl.hpp index 862cd94800025a..9b1317d9868b80 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/shl/shl.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/shl/shl.hpp @@ -24,11 +24,11 @@ struct ShlStructure { public: ShlStructure() = default; ShlStructure(const ShlStructure&) = default; - ShlStructure(ShlStructure&&) = default; + ShlStructure(ShlStructure&&) noexcept = default; explicit ShlStructure(T t) { reset(t); } ShlStructure &operator=(const ShlStructure&) = default; - ShlStructure &operator=(ShlStructure&&) = default; + ShlStructure& operator=(ShlStructure&&) noexcept = default; void reset(T t) { m_ptr.reset(t, traits::destructor); @@ -121,8 +121,9 @@ struct ShlTensor : public ShlStructure { VectorDims getShape() const { VectorDims shape(get()->dim_count); - for (size_t i = 0; i < shape.size(); ++i) + for (size_t i = 0; i < shape.size(); ++i) { shape[i] = static_cast(get()->dim[i]); + } return shape; } @@ -161,8 +162,9 @@ struct ShlTensor : public ShlStructure { void setShape(const VectorDims& shape) { get()->dim_count = shape.size(); OPENVINO_ASSERT(get()->dim_count < MAX_DIM, "Shl supports shapes with rank less or equal to 8"); - for (int i = 0; i < get()->dim_count; ++i) + for (int i = 0; i < get()->dim_count; ++i) { get()->dim[i] = static_cast(shape[i]); + } }; }; diff --git a/src/plugins/intel_cpu/src/nodes/executors/shl/shl_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/shl/shl_eltwise.cpp index 54f00ba20538b3..36a2cb6c51dd83 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/shl/shl_eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/shl/shl_eltwise.cpp @@ -52,8 +52,9 @@ bool ShlEltwiseExecutorBuilder::isSupported(const EltwiseAttrs& eltwiseAttrs, const auto unifiedRank = srcDescs.front()->as()->getBlockDims().size(); auto has_unified_layout = [unifiedLayout, unifiedRank](const MemoryDescPtr& desc) { if (desc->hasLayoutType(LayoutType::nspc)) { // ensure the same rank - if (desc->as()->getBlockDims().size() != unifiedRank) + if (desc->as()->getBlockDims().size() != unifiedRank) { return false; + } } return desc->hasLayoutType(unifiedLayout); }; diff --git a/src/plugins/intel_cpu/src/nodes/executors/shl/shl_fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/executors/shl/shl_fullyconnected.cpp index ceb50d89830836..c762ef1f970d39 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/shl/shl_fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/shl/shl_fullyconnected.cpp @@ -123,7 +123,8 @@ bool ShlFCExecutor::update(const MemoryArgs& memory) { const auto src_shape = src.getShape(); const auto dst_shape = dst.getShape(); - dim_M = std::accumulate(dst_shape.rbegin() + 1, dst_shape.rend(), size_t(1), std::multiplies()); + dim_M = + std::accumulate(dst_shape.rbegin() + 1, dst_shape.rend(), static_cast(1), std::multiplies()); dim_In = src_shape.back(); dim_Out = dst_shape.back(); LDA = dim_In * memory.at(ARG_SRC)->getPrecision().size(); diff --git a/src/plugins/intel_cpu/src/nodes/executors/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/executors/subgraph.cpp index 14b929633db8c6..4d2d497c5c8684 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/subgraph.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/subgraph.cpp @@ -13,18 +13,24 @@ namespace ov { namespace intel_cpu { bool operator==(const SubgraphAttrs& lhs, const SubgraphAttrs& rhs) { - if (&lhs == &rhs) + if (&lhs == &rhs) { return true; - if (lhs.bodyHash != rhs.bodyHash) + } + if (lhs.bodyHash != rhs.bodyHash) { return false; - if (lhs.inMemOrders.size() != rhs.inMemOrders.size() || lhs.inMemPrecs.size() != rhs.inMemPrecs.size()) + } + if (lhs.inMemOrders.size() != rhs.inMemOrders.size() || lhs.inMemPrecs.size() != rhs.inMemPrecs.size()) { return false; - if (lhs.outMemOrders.size() != rhs.outMemOrders.size() || lhs.outMemPrecs.size() != rhs.outMemPrecs.size()) + } + if (lhs.outMemOrders.size() != rhs.outMemOrders.size() || lhs.outMemPrecs.size() != rhs.outMemPrecs.size()) { return false; - if (lhs.inMemOrders != rhs.inMemOrders || lhs.inMemPrecs != rhs.inMemPrecs) + } + if (lhs.inMemOrders != rhs.inMemOrders || lhs.inMemPrecs != rhs.inMemPrecs) { return false; - if (lhs.outMemOrders != rhs.outMemOrders || lhs.outMemPrecs != rhs.outMemPrecs) + } + if (lhs.outMemOrders != rhs.outMemOrders || lhs.outMemPrecs != rhs.outMemPrecs) { return false; + } return true; } @@ -32,15 +38,19 @@ size_t get_attr_hash(size_t seed, const std::shared_ptr& attrs) { using namespace dnnl::impl; using namespace dnnl::impl::primitive_hashing; - for (const auto& order : attrs->inMemOrders) + for (const auto& order : attrs->inMemOrders) { seed = get_vector_hash(seed, order); - for (const auto& prec : attrs->inMemPrecs) + } + for (const auto& prec : attrs->inMemPrecs) { seed = hash_combine(seed, prec.hash()); + } - for (const auto& order : attrs->outMemOrders) + for (const auto& order : attrs->outMemOrders) { seed = get_vector_hash(seed, order); - for (const auto& prec : attrs->outMemPrecs) + } + for (const auto& prec : attrs->outMemPrecs) { seed = hash_combine(seed, prec.hash()); + } seed = hash_combine(seed, attrs->bodyHash); return seed; @@ -75,7 +85,7 @@ SubgraphBaseExecutor::SubgraphBaseExecutor(const std::shared_ptrtensor_rank; m_harness_work_amount = std::accumulate(m_parallel_exec_domain.cbegin(), m_parallel_exec_domain.cend(), - size_t(1), + static_cast(1), std::multiplies()); m_nthreads = std::min(parallel_get_max_threads(), static_cast(m_harness_work_amount)); diff --git a/src/plugins/intel_cpu/src/nodes/executors/subgraph.hpp b/src/plugins/intel_cpu/src/nodes/executors/subgraph.hpp index f731a507e61c2e..d26946bd36d772 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/subgraph.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/subgraph.hpp @@ -73,8 +73,9 @@ class SubgraphBaseExecutor { virtual void parallel_forNd(const initializer_functor& initializer, const call_functor& caller); inline void update_scratchpad_ptr(void*& scratchpad_ptr, size_t ithr) const { - if (m_buffer_scratchpad_size > 0) + if (m_buffer_scratchpad_size > 0) { scratchpad_ptr = m_buffer_scratchpad->getDataAs() + ithr * m_buffer_scratchpad_size; + } } std::shared_ptr m_schedule; @@ -113,11 +114,13 @@ class SubgraphStaticBaseExecutor { const std::vector& start_offset_in, const std::vector& start_offset_out, size_t ithr) { - for (size_t i = 0; i < srcMemPtrs.size(); i++) + for (size_t i = 0; i < srcMemPtrs.size(); i++) { call_args.src_ptrs[i] = srcMemPtrs[i]->getDataAs() + start_offset_in[i]; + } - for (size_t i = 0; i < dstMemPtrs.size(); i++) + for (size_t i = 0; i < dstMemPtrs.size(); i++) { call_args.dst_ptrs[i] = dstMemPtrs[i]->getDataAs() + start_offset_out[i]; + } } }; @@ -152,10 +155,12 @@ class SubgraphDynamicSpecializedBaseExecutor { src_ptrs.resize(in_num, nullptr); dst_ptrs.resize(out_num, nullptr); - for (size_t i = 0; i < in_num; i++) + for (size_t i = 0; i < in_num; i++) { src_ptrs[i] = srcMemPtrs[i]->getDataAs() + start_offset_in[i]; - for (size_t i = 0; i < out_num; i++) + } + for (size_t i = 0; i < out_num; i++) { dst_ptrs[i] = dstMemPtrs[i]->getDataAs() + start_offset_out[i]; + } } inline void update_ptrs(jit_snippets_call_args& call_args, diff --git a/src/plugins/intel_cpu/src/nodes/executors/transpose.cpp b/src/plugins/intel_cpu/src/nodes/executors/transpose.cpp index ddf4cf20034d92..babeb70cc5abf3 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/transpose.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/transpose.cpp @@ -19,10 +19,12 @@ jit_permute_config_params TransposeExecutor::prepareParams(const PermuteParams& VectorDims src_block_order = params.src_block_order; VectorDims src_block_strides(params.src_block_dims.size(), 1); VectorDims dst_block_strides(params.dst_block_dims.size(), 1); - for (int i = params.src_block_dims.size() - 2; i >= 0; i--) + for (int i = params.src_block_dims.size() - 2; i >= 0; i--) { src_block_strides[i] = src_block_strides[i + 1] * params.src_block_dims[i + 1]; - for (int i = params.dst_block_dims.size() - 2; i >= 0; i--) + } + for (int i = params.dst_block_dims.size() - 2; i >= 0; i--) { dst_block_strides[i] = dst_block_strides[i + 1] * params.dst_block_dims[i + 1]; + } VectorDims new_dst_block_strides = dst_block_strides; VectorDims new_dst_block_order = params.dst_block_order; diff --git a/src/plugins/intel_cpu/src/nodes/executors/x64/jit_transpose.cpp b/src/plugins/intel_cpu/src/nodes/executors/x64/jit_transpose.cpp index f6597a8b22cdae..32e6734c718b87 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/x64/jit_transpose.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/x64/jit_transpose.cpp @@ -12,8 +12,9 @@ using namespace dnnl::impl::cpu::x64; namespace ov { namespace intel_cpu { void JitTransposeExecutor::exec(const std::vector& src, const std::vector& dst) { - if (!pKernel) + if (!pKernel) { OPENVINO_THROW("Could not execute. Kernel for Transpose node was not compiled."); + } const uint8_t* srcData = src[0]->getDataAs(); uint8_t* dstData = dst[0]->getDataAs(); diff --git a/src/plugins/intel_cpu/src/nodes/executors/x64/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/executors/x64/subgraph.cpp index 0bb6bfaf6d1e26..1b2bcf217f7f64 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/x64/subgraph.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/x64/subgraph.cpp @@ -82,7 +82,7 @@ SubgraphExecutor::SubgraphExecutor(const std::shared_ptr& snip auto external_buffer_size = std::accumulate(m_repacked_inputs.begin(), m_repacked_inputs.end(), - size_t(0), + static_cast(0), [](size_t sum, const std::pair& p) { auto curr_mem_size = p.second.desc()->getCurrentMemSize(); OPENVINO_ASSERT(curr_mem_size != ov::intel_cpu::MemoryDesc::UNDEFINED_SIZE, @@ -98,8 +98,9 @@ SubgraphExecutor::SubgraphExecutor(const std::shared_ptr& snip // To avoid extra overheads in runtime on vector creation, // we initialize `repacked_offsets_by_threads` by default here m_repacked_offsets_by_threads.resize(m_nthreads); - for (size_t i = 0; i < m_repacked_offsets_by_threads.size(); ++i) + for (size_t i = 0; i < m_repacked_offsets_by_threads.size(); ++i) { clean_repacked_offsets(i); + } if (m_tensor_rank == rank6D) { init_offset = [](const std::vector& offsets, const std::vector& indexes, size_t& offset) { @@ -108,8 +109,9 @@ SubgraphExecutor::SubgraphExecutor(const std::shared_ptr& snip }; } else { init_offset = [](const std::vector& offsets, const std::vector& indexes, size_t& offset) { - for (size_t j = 0; j < indexes.size(); j++) + for (size_t j = 0; j < indexes.size(); j++) { offset += offsets[j] * indexes[j]; + } }; } } @@ -169,10 +171,11 @@ std::vector SubgraphExecutor::separately_repack_inputs(const dnnl::st "Unsupported shape rank of repacking data"); const auto& kernel = repacked_input.kernel(); - if (m_tensor_rank == rank6D) + if (m_tensor_rank == rank6D) { parallel4d_repacking(kernel.get(), dom, in_strides, out_strides, src, dst); - else + } else { parallelNd_repacking(kernel.get(), dom, in_strides, out_strides, src, dst); + } reordered_in_ptrs[in_idx] = dst_mem; offset += desc->getCurrentMemSize(); diff --git a/src/plugins/intel_cpu/src/nodes/executors/x64/subgraph.hpp b/src/plugins/intel_cpu/src/nodes/executors/x64/subgraph.hpp index fe41f64224fc83..46819bdb46a76f 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/x64/subgraph.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/x64/subgraph.hpp @@ -31,8 +31,9 @@ class SubgraphExecutor : public SubgraphBaseExecutor { jit_snippets_call_args& call_args); inline void* get_external_scratchpad_ptr(size_t ithr, size_t idx) const { - if (m_repacked_inputs.empty()) + if (m_repacked_inputs.empty()) { return nullptr; + } uint8_t* data_ptr = m_buffer_scratchpad->getDataAs() + m_internal_buffer_size; for (const auto& p : m_repacked_inputs) { diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.cpp index 7e46da40ddeded..277175df2dac80 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.cpp @@ -125,10 +125,12 @@ struct ConfidenceComparator { explicit ConfidenceComparator(const float* conf_data) : _conf_data(conf_data) {} bool operator()(int idx1, int idx2) { - if (_conf_data[idx1] > _conf_data[idx2]) + if (_conf_data[idx1] > _conf_data[idx2]) { return true; - if (_conf_data[idx1] < _conf_data[idx2]) + } + if (_conf_data[idx1] < _conf_data[idx2]) { return false; + } return idx1 < idx2; } @@ -266,13 +268,15 @@ ExperimentalDetectronDetectionOutput::ExperimentalDetectronDetectionOutput(const } void ExperimentalDetectronDetectionOutput::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } std::vector inDataConf; inDataConf.reserve(inputShapes.size()); - for (size_t i = 0; i < inputShapes.size(); ++i) + for (size_t i = 0; i < inputShapes.size(); ++i) { inDataConf.emplace_back(LayoutType::ncsp, ov::element::f32); + } addSupportedPrimDesc(inDataConf, {{LayoutType::ncsp, ov::element::f32}, diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.cpp index 7f613b39ac3e9f..a648b80250fdff 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.cpp @@ -153,12 +153,14 @@ void nms_cpu(const int num_boxes, #endif for (int box = 0; box < num_boxes; ++box) { - if (is_dead[box]) + if (is_dead[box]) { continue; + } index_out[count++] = base_index + box; - if (count == max_num_out) + if (count == max_num_out) { break; + } int tail = box + 1; @@ -245,8 +247,9 @@ void nms_cpu(const int num_boxes, res = area / (A_area + B_area - area); } - if (nms_thresh < res) + if (nms_thresh < res) { is_dead[tail] = 1; + } } } @@ -325,8 +328,9 @@ ExperimentalDetectronGenerateProposalsSingleImage::ExperimentalDetectronGenerate } void ExperimentalDetectronGenerateProposalsSingleImage::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } addSupportedPrimDesc({{LayoutType::ncsp, ov::element::f32}, {LayoutType::ncsp, ov::element::f32}, @@ -353,16 +357,18 @@ void ExperimentalDetectronGenerateProposalsSingleImage::execute(const dnnl::stre for (size_t i = 0; i < deltaDims.size(); i++) { deltas_dims_size *= deltaDims[i]; } - if (anchor_dims_size != deltas_dims_size) + if (anchor_dims_size != deltas_dims_size) { OPENVINO_THROW("'Anchors' blob size for ONNXProposal is incompatible with 'deltas' blob size!"); + } size_t score_dims_size = 1; const auto& scoreDims = getParentEdgeAt(INPUT_SCORES)->getMemory().getStaticDims(); for (size_t i = 0; i < scoreDims.size(); i++) { score_dims_size *= scoreDims[i]; } - if (deltas_dims_size != (4 * score_dims_size)) + if (deltas_dims_size != (4 * score_dims_size)) { OPENVINO_THROW("'Deltas' blob size for ONNXProposal is incompatible with 'scores' blob size!"); + } // Prepare memory const float* p_deltas_item = getSrcDataAtPortAs(INPUT_DELTAS); diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.cpp index 298f369930238f..63260b9310b456 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.cpp @@ -36,8 +36,9 @@ ExperimentalDetectronPriorGridGenerator::ExperimentalDetectronPriorGridGenerator } const auto priorGridGen = ov::as_type_ptr(op); - if (getOriginalInputsNumber() != 3 || getOriginalOutputsNumber() != 1) + if (getOriginalInputsNumber() != 3 || getOriginalOutputsNumber() != 1) { THROW_CPU_NODE_ERR("has incorrect number of input/output edges!"); + } const auto& attr = priorGridGen->get_attrs(); grid_w_ = attr.w; @@ -47,8 +48,9 @@ ExperimentalDetectronPriorGridGenerator::ExperimentalDetectronPriorGridGenerator } void ExperimentalDetectronPriorGridGenerator::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } addSupportedPrimDesc({{LayoutType::ncsp, ov::element::f32}, {LayoutType::ncsp, ov::element::f32}, diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.cpp index a22ecad308c52f..c92cba0f00a34c 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.cpp @@ -304,13 +304,15 @@ ExperimentalDetectronROIFeatureExtractor::ExperimentalDetectronROIFeatureExtract } void ExperimentalDetectronROIFeatureExtractor::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } std::vector inDataConf; inDataConf.reserve(inputShapes.size()); - for (size_t i = 0; i < inputShapes.size(); ++i) + for (size_t i = 0; i < inputShapes.size(); ++i) { inDataConf.emplace_back(LayoutType::ncsp, ov::element::f32); + } addSupportedPrimDesc(inDataConf, {{LayoutType::ncsp, ov::element::f32}, {LayoutType::ncsp, ov::element::f32}}, diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.cpp index 0fdad9f3c050fc..b9ae3ba31ca798 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.cpp @@ -39,23 +39,27 @@ ExperimentalDetectronTopKROIs::ExperimentalDetectronTopKROIs(const std::shared_p } const auto topKROI = ov::as_type_ptr(op); - if (topKROI == nullptr) + if (topKROI == nullptr) { OPENVINO_THROW("Operation with name '", op->get_friendly_name(), "' is not an instance of ExperimentalDetectronTopKROIs from opset6."); + } - if (inputShapes.size() != 2 || outputShapes.size() != 1) + if (inputShapes.size() != 2 || outputShapes.size() != 1) { THROW_CPU_NODE_ERR("has incorrect number of input/output edges!"); + } - if (getInputShapeAtPort(INPUT_ROIS).getRank() != 2 || getInputShapeAtPort(INPUT_PROBS).getRank() != 1) + if (getInputShapeAtPort(INPUT_ROIS).getRank() != 2 || getInputShapeAtPort(INPUT_PROBS).getRank() != 1) { THROW_CPU_NODE_ERR("has unsupported input shape"); + } max_rois_num_ = topKROI->get_max_rois(); } void ExperimentalDetectronTopKROIs::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } addSupportedPrimDesc({{LayoutType::ncsp, ov::element::f32}, {LayoutType::ncsp, ov::element::f32}}, {{LayoutType::ncsp, ov::element::f32}}, diff --git a/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp b/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp index 7fbe1c3449bfd0..0038fc467de010 100644 --- a/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp +++ b/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp @@ -69,8 +69,9 @@ struct jit_extract_image_patches_kernel : public jit_uni_extract_image_patches_k this->postamble(); - if (mayiuse_gather) + if (mayiuse_gather) { prepare_table(); + } } private: @@ -291,8 +292,9 @@ struct jit_extract_image_patches_kernel : public jit_uni_extract_image_patches_k void prepare_table() { align(64); L(gather_index_table); - for (size_t i = 0; i < vlen / sizeof(int32_t); i++) + for (size_t i = 0; i < vlen / sizeof(int32_t); i++) { dd(i * jpp.SW * jpp.dtype_size); + } } }; #endif // OPENVINO_ARCH_X86_64 @@ -366,18 +368,21 @@ ExtractImagePatches::ExtractImagePatches(const std::shared_ptr& op, co auto extImgPatcher = ov::as_type_ptr(op); - if (inputShapes.size() != 1 || outputShapes.size() != 1) + if (inputShapes.size() != 1 || outputShapes.size() != 1) { THROW_CPU_NODE_ERR("has incorrect number of input or output edges!", " Input: ", inputShapes.size(), "); Output: ", outputShapes.size()); + } - if (getInputShapeAtPort(0).getRank() != 4) + if (getInputShapeAtPort(0).getRank() != 4) { THROW_CPU_NODE_ERR("must have 4D input tensor. Actual: ", getInputShapeAtPort(0).getRank()); + } - if (getOutputShapeAtPort(0).getRank() != 4) + if (getOutputShapeAtPort(0).getRank() != 4) { THROW_CPU_NODE_ERR("must have 4D output tensor. Actual: ", getOutputShapeAtPort(0).getRank()); + } if (extImgPatcher->get_auto_pad() == ov::op::PadType::VALID) { _auto_pad = ExtImgPatcherPadType::VALID; @@ -393,19 +398,23 @@ ExtractImagePatches::ExtractImagePatches(const std::shared_ptr& op, co ; _strides = extImgPatcher->get_strides(); _rates = extImgPatcher->get_rates(); - if (_ksizes.size() != 2 || _strides.size() != 2 || _rates.size() != 2) + if (_ksizes.size() != 2 || _strides.size() != 2 || _rates.size() != 2) { THROW_CPU_NODE_ERR("must have the following attributes with shape {2}: sizes, strides, rates."); + } } void ExtractImagePatches::prepareParams() { const auto& srcMemPtr0 = getSrcMemoryAtPort(0); const auto& dstMemPtr = getDstMemoryAtPort(0); - if (!srcMemPtr0 || !srcMemPtr0->isDefined()) + if (!srcMemPtr0 || !srcMemPtr0->isDefined()) { OPENVINO_THROW("Input memory is undefined."); - if (!dstMemPtr || !dstMemPtr->isDefined()) + } + if (!dstMemPtr || !dstMemPtr->isDefined()) { OPENVINO_THROW("Destination memory is undefined."); - if (getSelectedPrimitiveDescriptor() == nullptr) + } + if (getSelectedPrimitiveDescriptor() == nullptr) { OPENVINO_THROW("Preferable primitive descriptor is not set."); + } const auto& in_dims = getParentEdgeAt(0)->getMemory().getStaticDims(); const auto& out_dims = getChildEdgeAt(0)->getMemory().getStaticDims(); @@ -437,12 +446,14 @@ void ExtractImagePatches::prepareParams() { } void ExtractImagePatches::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } const auto precision = getOriginalInputPrecisionAtPort(0); - if (_supported_precisions_sizes.find(precision.size()) == _supported_precisions_sizes.end()) + if (_supported_precisions_sizes.find(precision.size()) == _supported_precisions_sizes.end()) { THROW_CPU_NODE_ERR("has unsupported precision: ", precision.get_type_name()); + } addSupportedPrimDesc({{LayoutType::ncsp, precision}}, {{LayoutType::ncsp, precision}}, impl_desc_type::ref_any); } @@ -650,8 +661,9 @@ ExtractImagePatches::ExtractImagePatchesJitExecutor::ExtractImagePatchesJitExecu OPENVINO_THROW("Can't create jit extract image patches kernel"); } - if (pKernel) + if (pKernel) { pKernel->create_ker(); + } #endif // OPENVINO_ARCH_X86_64 } @@ -659,8 +671,9 @@ void ExtractImagePatches::ExtractImagePatchesJitExecutor::exec(void* src, void* dst, const VectorDims& istrides, const VectorDims& ostrides) { - if (!pKernel) + if (!pKernel) { OPENVINO_THROW("Can't execute, kernel for extract image patches node is not compiled"); + } executeOptimizedGeneric(src, dst, istrides, ostrides); } diff --git a/src/plugins/intel_cpu/src/nodes/eye.cpp b/src/plugins/intel_cpu/src/nodes/eye.cpp index 411a77260aa7d6..dfd0c06d2fec1e 100644 --- a/src/plugins/intel_cpu/src/nodes/eye.cpp +++ b/src/plugins/intel_cpu/src/nodes/eye.cpp @@ -43,10 +43,12 @@ Eye::Eye(const std::shared_ptr& op, const GraphContext::CPtr& context) } void Eye::getSupportedDescriptors() { - if (!one_of(getParentEdges().size(), 3u, 4u)) + if (!one_of(getParentEdges().size(), 3u, 4u)) { THROW_CPU_NODE_ERR("has incorrect number of input edges: ", getParentEdges().size()); - if (getChildEdges().empty()) + } + if (getChildEdges().empty()) { THROW_CPU_NODE_ERR("has incorrect number of output edges: ", getChildEdges().size()); + } } template @@ -70,14 +72,16 @@ void Eye::execute(const dnnl::stream& strm) { } void Eye::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } std::vector inDataConf; std::vector outDataConf; inDataConf.reserve(inputShapes.size()); - for (size_t i = 0; i < inputShapes.size(); ++i) + for (size_t i = 0; i < inputShapes.size(); ++i) { inDataConf.emplace_back(LayoutType::ncsp, ov::element::i32); + } outDataConf.reserve(1); outDataConf.emplace_back(LayoutType::ncsp, outType); @@ -90,8 +94,9 @@ void Eye::executeSpecified() { const size_t colNum = getColNum(); const int64_t shift = getDiagIndex(); auto outPtr = getDstMemoryAtPort(0); - if (!outPtr || !outPtr->isDefined()) + if (!outPtr || !outPtr->isDefined()) { THROW_CPU_NODE_ERR("Destination memory is undefined."); + } T* dst = outPtr->getDataAs(); const size_t batchVolume = getBatchVolume(getBatchShape()); @@ -100,10 +105,11 @@ void Eye::executeSpecified() { const size_t l2CacheSize = dnnl::utils::get_cache_size(2, true); const size_t elementsCount = colNum * rowNum * batchVolume; - const int64_t countByColumns = std::max(int64_t(colNum) - std::abs(shift), int64_t(0)); - const int64_t countByRows = std::max(int64_t(rowNum) - std::abs(shift), int64_t(0)); - const size_t onesPerBatchNum = static_cast(shift > 0 ? std::min(countByColumns, int64_t(rowNum)) - : std::min(countByRows, int64_t(colNum))); + const int64_t countByColumns = std::max(static_cast(colNum) - std::abs(shift), static_cast(0)); + const int64_t countByRows = std::max(static_cast(rowNum) - std::abs(shift), static_cast(0)); + const size_t onesPerBatchNum = + static_cast(shift > 0 ? std::min(countByColumns, static_cast(rowNum)) + : std::min(countByRows, static_cast(colNum))); const size_t dataShift = static_cast(shift >= 0 ? shift : -shift * colNum); if (spatialSize >= l2CacheSize) { @@ -112,8 +118,9 @@ void Eye::executeSpecified() { splitter(elementsCount, nthr, ithr, start, end); memset(dst + start, 0, (end - start) * sizeof(T)); }); - if (onesPerBatchNum == 0) + if (onesPerBatchNum == 0) { return; + } for (size_t bShift = 0; bShift < batchVolume * spatialCount; bShift += spatialCount) { parallel_nt(0, [&](const size_t ithr, const size_t nthr) { size_t start = 0, end = 0; @@ -128,8 +135,9 @@ void Eye::executeSpecified() { size_t start = 0, end = 0; splitter(batchVolume, nthr, ithr, start, end); memset(dst + start * spatialCount, 0, (end - start) * spatialSize); - if (onesPerBatchNum == 0) + if (onesPerBatchNum == 0) { return; + } for (size_t spShift = start * spatialCount; spShift < end * spatialCount; spShift += spatialCount) { for (size_t j = 0; j < onesPerBatchNum; j++) { dst[dataShift + j * (colNum + 1) + spShift] = static_cast(1); diff --git a/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp b/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp index f08657b7cc2f88..23551606407f52 100644 --- a/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp +++ b/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp @@ -143,10 +143,11 @@ struct jit_uni_binarization_kernel : public jit_uni_quantize_kernel, public jit_ shl(reg_src_32, i * step); or_(reg_bin_32, reg_src_32); } - if (isa == avx512_core) + if (isa == avx512_core) { mov(ptr[reg_to], reg_bin_16); - else + } else { mov(ptr[reg_to], reg_bin_8); + } add(reg_from, main_loop_step * sizeof(float)); add(reg_thresholds, main_loop_step * sizeof(float)); @@ -179,10 +180,11 @@ struct jit_uni_binarization_kernel : public jit_uni_quantize_kernel, public jit_ or_(reg_bin_32, reg_src_32); shl(reg_mask, 1); } - if (isa == avx512_core && tail_size > nbits) + if (isa == avx512_core && tail_size > nbits) { mov(ptr[reg_to], reg_bin_16); - else + } else { mov(ptr[reg_to], reg_bin_8); + } } } @@ -250,10 +252,11 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_ this->preamble(); - if (jqp_.is_planar) + if (jqp_.is_planar) { compute_planar(); - else + } else { compute_generic(); + } this->postamble(); } @@ -356,56 +359,74 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_ inline void load_broadcasted_vectors_only(size_t idx) { const auto& broadcasted = jqp_.broadcasted; - if (broadcasted[static_cast(FQ_add_input_type::CROP_LOW)]) + if (broadcasted[static_cast(FQ_add_input_type::CROP_LOW)]) { uni_vbroadcastss(vmm_crop_low(idx), ptr[reg_crop_low]); - if (broadcasted[static_cast(FQ_add_input_type::CROP_HIGH)]) + } + if (broadcasted[static_cast(FQ_add_input_type::CROP_HIGH)]) { uni_vbroadcastss(vmm_crop_high(idx), ptr[reg_crop_high]); - if (broadcasted[static_cast(FQ_add_input_type::INPUT_SCALE)]) + } + if (broadcasted[static_cast(FQ_add_input_type::INPUT_SCALE)]) { uni_vbroadcastss(vmm_input_scale(idx), ptr[reg_input_scale]); - if (broadcasted[static_cast(FQ_add_input_type::INPUT_SHIFT)]) + } + if (broadcasted[static_cast(FQ_add_input_type::INPUT_SHIFT)]) { uni_vbroadcastss(vmm_input_shift(idx), ptr[reg_input_shift]); + } if (do_dequantization) { - if (broadcasted[static_cast(FQ_add_input_type::OUTPUT_SCALE)]) + if (broadcasted[static_cast(FQ_add_input_type::OUTPUT_SCALE)]) { uni_vbroadcastss(vmm_output_scale(idx), ptr[reg_output_scale]); - if (broadcasted[static_cast(FQ_add_input_type::OUTPUT_SHIFT)]) + } + if (broadcasted[static_cast(FQ_add_input_type::OUTPUT_SHIFT)]) { uni_vbroadcastss(vmm_output_shift(idx), ptr[reg_output_shift]); + } } } template inline void load_not_broadcasted_vectors_only(size_t idx, size_t offset) { const auto& broadcasted = jqp_.broadcasted; - if (!broadcasted[static_cast(FQ_add_input_type::CROP_LOW)]) + if (!broadcasted[static_cast(FQ_add_input_type::CROP_LOW)]) { uni_vmovups(T(vmm_crop_low(idx).getIdx()), ptr[reg_crop_low + offset]); - if (!broadcasted[static_cast(FQ_add_input_type::CROP_HIGH)]) + } + if (!broadcasted[static_cast(FQ_add_input_type::CROP_HIGH)]) { uni_vmovups(T(vmm_crop_high(idx).getIdx()), ptr[reg_crop_high + offset]); - if (!broadcasted[static_cast(FQ_add_input_type::INPUT_SCALE)]) + } + if (!broadcasted[static_cast(FQ_add_input_type::INPUT_SCALE)]) { uni_vmovups(T(vmm_input_scale(idx).getIdx()), ptr[reg_input_scale + offset]); - if (!broadcasted[static_cast(FQ_add_input_type::INPUT_SHIFT)]) + } + if (!broadcasted[static_cast(FQ_add_input_type::INPUT_SHIFT)]) { uni_vmovups(T(vmm_input_shift(idx).getIdx()), ptr[reg_input_shift + offset]); + } if (do_dequantization) { - if (!broadcasted[static_cast(FQ_add_input_type::OUTPUT_SCALE)]) + if (!broadcasted[static_cast(FQ_add_input_type::OUTPUT_SCALE)]) { uni_vmovups(T(vmm_output_scale(idx).getIdx()), ptr[reg_output_scale + offset]); - if (!broadcasted[static_cast(FQ_add_input_type::OUTPUT_SHIFT)]) + } + if (!broadcasted[static_cast(FQ_add_input_type::OUTPUT_SHIFT)]) { uni_vmovups(T(vmm_output_shift(idx).getIdx()), ptr[reg_output_shift + offset]); + } } } inline void increase_ptrs_if_not_broadcasted(size_t offset) { const auto& broadcasted = jqp_.broadcasted; - if (!broadcasted[static_cast(FQ_add_input_type::CROP_LOW)]) + if (!broadcasted[static_cast(FQ_add_input_type::CROP_LOW)]) { add(reg_crop_low, offset); - if (!broadcasted[static_cast(FQ_add_input_type::CROP_HIGH)]) + } + if (!broadcasted[static_cast(FQ_add_input_type::CROP_HIGH)]) { add(reg_crop_high, offset); - if (!broadcasted[static_cast(FQ_add_input_type::INPUT_SCALE)]) + } + if (!broadcasted[static_cast(FQ_add_input_type::INPUT_SCALE)]) { add(reg_input_scale, offset); - if (!broadcasted[static_cast(FQ_add_input_type::INPUT_SHIFT)]) + } + if (!broadcasted[static_cast(FQ_add_input_type::INPUT_SHIFT)]) { add(reg_input_shift, offset); + } if (do_dequantization) { - if (!broadcasted[static_cast(FQ_add_input_type::OUTPUT_SCALE)]) + if (!broadcasted[static_cast(FQ_add_input_type::OUTPUT_SCALE)]) { add(reg_output_scale, offset); - if (!broadcasted[static_cast(FQ_add_input_type::OUTPUT_SHIFT)]) + } + if (!broadcasted[static_cast(FQ_add_input_type::OUTPUT_SHIFT)]) { add(reg_output_shift, offset); + } } } @@ -424,8 +445,9 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_ mov(reg_output_shift, ptr[param + GET_OFF(output_shift)]); mov(reg_work_amount, ptr[param + GET_OFF(work_amount)]); - if (isa == cpu::x64::avx512_core) + if (isa == cpu::x64::avx512_core) { uni_vpxor(vmm_zero, vmm_zero, vmm_zero); + } int simd_w = isa == cpu::x64::avx512_core ? 16 : 8; int tail_simd_w = 4; @@ -459,10 +481,12 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_ uni_vminps(vmm_val(i), vmm_val(i), vmm_crop_high(0)); uni_vmaxps(vmm_val(i), vmm_val(i), vmm_crop_low(0)); uni_vfmadd213ps(vmm_val(i), vmm_input_scale(0), vmm_input_shift(0)); - if (do_rounding) + if (do_rounding) { uni_vroundps(vmm_val(i), vmm_val(i), 0); - if (do_dequantization) + } + if (do_dequantization) { uni_vfmadd213ps(vmm_val(i), vmm_output_scale(0), vmm_output_shift(0)); + } store_vector(ptr[reg_to + i * (simd_w / 2) * dst_type_size], vmm_val(i), jqp_.dst_prc); } @@ -484,10 +508,12 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_ uni_vminps(xmm_val(0), xmm_val(0), xmm_crop_high(0)); uni_vmaxps(xmm_val(0), xmm_val(0), xmm_crop_low(0)); uni_vfmadd213ps(xmm_val(0), xmm_input_scale(0), xmm_input_shift(0)); - if (do_rounding) + if (do_rounding) { uni_vroundps(xmm_val(0), xmm_val(0), 0); - if (do_dequantization) + } + if (do_dequantization) { uni_vfmadd213ps(xmm_val(0), xmm_output_scale(0), xmm_output_shift(0)); + } store_vector(ptr[reg_to], xmm_val(0), jqp_.dst_prc); @@ -511,10 +537,12 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_ uni_vminps(xmm_val(0), xmm_val(0), xmm_crop_high(0)); uni_vmaxps(xmm_val(0), xmm_val(0), xmm_crop_low(0)); uni_vfmadd213ps(xmm_val(0), xmm_input_scale(0), xmm_input_shift(0)); - if (do_rounding) + if (do_rounding) { uni_vroundps(xmm_val(0), xmm_val(0), 0); - if (do_dequantization) + } + if (do_dequantization) { uni_vfmadd213ps(xmm_val(0), xmm_output_scale(0), xmm_output_shift(0)); + } store_scalar(ptr[aux_reg_to], xmm_val(0), jqp_.dst_prc); @@ -550,8 +578,9 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_ mov(reg_block_size, ptr[param + GET_OFF(block_size)]); mov(reg_work_amount, ptr[param + GET_OFF(work_amount)]); - if (isa == cpu::x64::avx512_core) + if (isa == cpu::x64::avx512_core) { uni_vpxor(vmm_zero, vmm_zero, vmm_zero); + } constexpr unsigned simd_w = isa == cpu::x64::avx512_core ? 16 : 8; constexpr unsigned tail8_simd_w = 8; @@ -591,10 +620,12 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_ uni_vminps(vmm_val(i), vmm_val(i), vmm_crop_high(i)); uni_vmaxps(vmm_val(i), vmm_val(i), vmm_crop_low(i)); uni_vfmadd213ps(vmm_val(i), vmm_input_scale(i), vmm_input_shift(i)); - if (do_rounding) + if (do_rounding) { uni_vroundps(vmm_val(i), vmm_val(i), 0); - if (do_dequantization) + } + if (do_dequantization) { uni_vfmadd213ps(vmm_val(i), vmm_output_scale(i), vmm_output_shift(i)); + } store_vector(ptr[reg_to + i * (simd_w / 2) * dst_type_size], vmm_val(i), jqp_.dst_prc); } @@ -628,10 +659,12 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_ uni_vminps(ymm_val(0), ymm_val(0), ymm_crop_high(0)); uni_vmaxps(ymm_val(0), ymm_val(0), ymm_crop_low(0)); uni_vfmadd213ps(ymm_val(0), ymm_input_scale(0), ymm_input_shift(0)); - if (do_rounding) + if (do_rounding) { uni_vroundps(ymm_val(0), ymm_val(0), 0); - if (do_dequantization) + } + if (do_dequantization) { uni_vfmadd213ps(ymm_val(0), ymm_output_scale(0), ymm_output_shift(0)); + } store_vector(ptr[aux_reg_to], ymm_val(0), jqp_.dst_prc); @@ -671,10 +704,12 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_ uni_vminps(xmm_val(0), xmm_val(0), xmm_crop_high(0)); uni_vmaxps(xmm_val(0), xmm_val(0), xmm_crop_low(0)); uni_vfmadd213ps(xmm_val(0), xmm_input_scale(0), xmm_input_shift(0)); - if (do_rounding) + if (do_rounding) { uni_vroundps(xmm_val(0), xmm_val(0), 0); - if (do_dequantization) + } + if (do_dequantization) { uni_vfmadd213ps(xmm_val(0), xmm_output_scale(0), xmm_output_shift(0)); + } store_vector(ptr[aux_reg_to], xmm_val(0), jqp_.dst_prc); @@ -710,19 +745,25 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_ auto tail_unroll = [&](size_t iter) { const auto& broadcasted = jqp_.broadcasted; for (size_t i = 0; i < iter; i++) { - if (!broadcasted[static_cast(FQ_add_input_type::CROP_LOW)]) + if (!broadcasted[static_cast(FQ_add_input_type::CROP_LOW)]) { uni_vmovss(xmm_crop_low(0), ptr[reg_crop_low + i * wei_type_size]); - if (!broadcasted[static_cast(FQ_add_input_type::CROP_HIGH)]) + } + if (!broadcasted[static_cast(FQ_add_input_type::CROP_HIGH)]) { uni_vmovss(xmm_crop_high(0), ptr[reg_crop_high + i * wei_type_size]); - if (!broadcasted[static_cast(FQ_add_input_type::INPUT_SCALE)]) + } + if (!broadcasted[static_cast(FQ_add_input_type::INPUT_SCALE)]) { uni_vmovss(xmm_input_scale(0), ptr[reg_input_scale + i * wei_type_size]); - if (!broadcasted[static_cast(FQ_add_input_type::INPUT_SHIFT)]) + } + if (!broadcasted[static_cast(FQ_add_input_type::INPUT_SHIFT)]) { uni_vmovss(xmm_input_shift(0), ptr[reg_input_shift + i * wei_type_size]); + } if (do_dequantization) { - if (!broadcasted[static_cast(FQ_add_input_type::OUTPUT_SCALE)]) + if (!broadcasted[static_cast(FQ_add_input_type::OUTPUT_SCALE)]) { uni_vmovss(xmm_output_scale(0), ptr[reg_output_scale + i * wei_type_size]); - if (!broadcasted[static_cast(FQ_add_input_type::OUTPUT_SHIFT)]) + } + if (!broadcasted[static_cast(FQ_add_input_type::OUTPUT_SHIFT)]) { uni_vmovss(xmm_output_shift(0), ptr[reg_output_shift + i * wei_type_size]); + } } load_scalar(xmm_val(0), ptr[aux_reg_from + i * src_type_size], jqp_.src_prc); @@ -730,10 +771,12 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_ uni_vminps(xmm_val(0), xmm_val(0), xmm_crop_high(0)); uni_vmaxps(xmm_val(0), xmm_val(0), xmm_crop_low(0)); uni_vfmadd213ps(xmm_val(0), xmm_input_scale(0), xmm_input_shift(0)); - if (do_rounding) + if (do_rounding) { uni_vroundps(xmm_val(0), xmm_val(0), 0); - if (do_dequantization) + } + if (do_dequantization) { uni_vfmadd213ps(xmm_val(0), xmm_output_scale(0), xmm_output_shift(0)); + } store_scalar(ptr[aux_reg_to + i * dst_type_size], xmm_val(0), jqp_.dst_prc); } @@ -1064,13 +1107,16 @@ FakeQuantize::FakeQuantize(const std::shared_ptr& op, const GraphConte const auto fq = ov::as_type_ptr(op); levels = fq->get_levels(); - if (levels <= 1) + if (levels <= 1) { THROW_CPU_NODE_ERR("supports 'levels' attribute greater than or equal to 2"); + } - if (inputShapes.size() != 5) + if (inputShapes.size() != 5) { THROW_CPU_NODE_ERR("has incorrect number of input edges: ", inputShapes.size()); - if (outputShapes.size() != 1) + } + if (outputShapes.size() != 1) { THROW_CPU_NODE_ERR("has incorrect number of output edges: ", outputShapes.size()); + } auto initAxisIdx = [&](const VectorDims& inputDims) { size_t axisIdx = 0; @@ -1189,13 +1235,15 @@ FakeQuantize::FakeQuantize(const std::shared_ptr& op, const GraphConte } } else { auto allElementsAreEqual = [&](const std::vector& data, size_t size) { - if (size == 0) + if (size == 0) { return true; + } auto first = data[0]; for (size_t i = 1; i < size; i++) { - if (data[i] != first) + if (data[i] != first) { return false; + } } return true; @@ -1248,18 +1296,19 @@ FakeQuantize::FakeQuantize(const std::shared_ptr& op, const GraphConte inputScaleSize, inputShiftSize, outputScaleSize, - outputShiftSize)) + outputShiftSize)) { broadcastingPolicy = PerTensor; - else if (one_of(1u, - cropLowSize, - cropHighSize, - inputScaleSize, - inputShiftSize, - outputScaleSize, - outputShiftSize)) + } else if (one_of(1u, + cropLowSize, + cropHighSize, + inputScaleSize, + inputShiftSize, + outputScaleSize, + outputShiftSize)) { broadcastingPolicy = Mixed; - else + } else { broadcastingPolicy = PerChannel; + } bool quantizationOnly = true; @@ -1317,8 +1366,9 @@ FakeQuantize::FakeQuantize(const std::shared_ptr& op, const GraphConte outputScale[i] = (oh - ol) / (levels - 1); #endif - if (outputScale[i] != 1.f) + if (outputScale[i] != 1.f) { quantizationOnly = false; + } } for (size_t i = 0; i < outputShift.size(); i++) { @@ -1326,8 +1376,9 @@ FakeQuantize::FakeQuantize(const std::shared_ptr& op, const GraphConte outputShift[i] = ol; - if (outputShift[i] != 0.f) + if (outputShift[i] != 0.f) { quantizationOnly = false; + } } bool isFakeQuantization = true; @@ -1399,20 +1450,24 @@ void FakeQuantize::init() { outputPrecision = getOriginalOutputPrecisionAtPort(0); if (inputPrecision != ov::element::f32 && inputPrecision != ov::element::u8 && - inputPrecision != ov::element::i8) + inputPrecision != ov::element::i8) { inputPrecision = ov::element::f32; + } if (outputPrecision != ov::element::f32 && outputPrecision != ov::element::u8 && - outputPrecision != ov::element::i8) + outputPrecision != ov::element::i8) { outputPrecision = ov::element::f32; + } } } void FakeQuantize::getSupportedDescriptors() { - if (getParentEdges().size() != 5) + if (getParentEdges().size() != 5) { THROW_CPU_NODE_ERR("has incorrect number of input edges: ", getParentEdges().size()); - if (getChildEdges().empty()) + } + if (getChildEdges().empty()) { THROW_CPU_NODE_ERR("has incorrect number of output edges: ", getChildEdges().size()); + } if (getInputShapeAtPort(0).getRank() != getOutputShapeAtPort(0).getRank()) { THROW_CPU_NODE_ERR("has different ranks for input and output tensors"); @@ -1425,16 +1480,19 @@ void FakeQuantize::getSupportedDescriptors() { } if (getAxis() != 1) { - if (isBinarization()) + if (isBinarization()) { THROW_CPU_NODE_ERR("doesn't support non per-tensor binarization for axis: ", getAxis()); - if (getAxis() != 0) + } + if (getAxis() != 0) { THROW_CPU_NODE_ERR("doesn't support non per-tensor quantization for axis: ", getAxis()); + } } } void FakeQuantize::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } impl_desc_type impl_type; if (mayiuse(cpu::x64::avx512_core)) { @@ -1494,8 +1552,9 @@ void FakeQuantize::initSupportedPrimitiveDescriptors() { bool FakeQuantize::needPrepareParams() const { if (isBinarization()) { auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); - if (!selectedPrimitiveDescriptor) + if (!selectedPrimitiveDescriptor) { OPENVINO_THROW("CPU quantize node with name '", getName(), "' doesn't have primitive descriptors."); + } if (internalBlobMemory.empty() || (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref && inputShapesModified())) { @@ -1570,8 +1629,9 @@ void FakeQuantize::prepareParams() { void FakeQuantize::createPrimitive() { Node::createPrimitive(); auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); - if (!selectedPrimitiveDescriptor) + if (!selectedPrimitiveDescriptor) { OPENVINO_THROW("CPU quantize node with name '", getName(), "' doesn't have primitive descriptors."); + } if (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref) { const auto& config = getSelectedPrimitiveDescriptor()->getConfig(); @@ -1596,18 +1656,24 @@ void FakeQuantize::createPrimitive() { : srcDesc.hasLayoutType(LayoutType::nCsp8c) ? 8 : 1; if (paddedSize != 1) { - if (!broadcasted[static_cast(FQ_add_input_type::CROP_LOW)]) + if (!broadcasted[static_cast(FQ_add_input_type::CROP_LOW)]) { cropLow.resize(rnd_up(cropLow.size(), paddedSize)); - if (!broadcasted[static_cast(FQ_add_input_type::CROP_HIGH)]) + } + if (!broadcasted[static_cast(FQ_add_input_type::CROP_HIGH)]) { cropHigh.resize(rnd_up(cropHigh.size(), paddedSize)); - if (!broadcasted[static_cast(FQ_add_input_type::INPUT_SCALE)]) + } + if (!broadcasted[static_cast(FQ_add_input_type::INPUT_SCALE)]) { inputScale.resize(rnd_up(inputScale.size(), paddedSize)); - if (!broadcasted[static_cast(FQ_add_input_type::INPUT_SHIFT)]) + } + if (!broadcasted[static_cast(FQ_add_input_type::INPUT_SHIFT)]) { inputShift.resize(rnd_up(inputShift.size(), paddedSize)); - if (!broadcasted[static_cast(FQ_add_input_type::OUTPUT_SCALE)]) + } + if (!broadcasted[static_cast(FQ_add_input_type::OUTPUT_SCALE)]) { outputScale.resize(rnd_up(outputScale.size(), paddedSize)); - if (!broadcasted[static_cast(FQ_add_input_type::OUTPUT_SHIFT)]) + } + if (!broadcasted[static_cast(FQ_add_input_type::OUTPUT_SHIFT)]) { outputShift.resize(rnd_up(outputShift.size(), paddedSize)); + } } key.jqp.broadcasted = broadcasted; @@ -1675,7 +1741,7 @@ void FakeQuantize::executeReference() { uint32_t res = (val > thr) ? 0xffffffff : 0x00000000; - auto bit = uint8_t(res == out_mask); + auto bit = static_cast(res == out_mask); bin_val |= (bit << shift); } @@ -1762,7 +1828,7 @@ void FakeQuantize::executeBinarization(const std::unique_ptr(C); (*pKernel)(&arg); }); @@ -1833,10 +1899,10 @@ void FakeQuantize::executeQuantization(const std::unique_ptr(FQ_add_input_type::OUTPUT_SHIFT)] ? &outputShift[0] : &outputShift[c]; - arg.src_step = (size_t)blk_size * src_type_size; - arg.dst_step = (size_t)blk_size * dst_type_size; - arg.block_size = (size_t)blk_size; - arg.work_amount = (size_t)H; + arg.src_step = static_cast(blk_size) * src_type_size; + arg.dst_step = static_cast(blk_size) * dst_type_size; + arg.block_size = static_cast(blk_size); + arg.work_amount = static_cast(H); (*pKernel)(&arg); }); @@ -1868,10 +1934,12 @@ void FakeQuantize::executeQuantization(const std::unique_ptr(FQ_add_input_type::OUTPUT_SHIFT)] ? &outputShift[0] : &outputShift[c]; - arg.src_step = is_blk_format ? (size_t)blk_size * src_type_size : (size_t)C * src_type_size; - arg.dst_step = is_blk_format ? (size_t)blk_size * dst_type_size : (size_t)C * dst_type_size; - arg.block_size = is_blk_format ? (size_t)blk_size : nstl::min(blk_size, C - c); - arg.work_amount = (size_t)std::min(static_cast(batch_size), H * W - b * batch_size); + arg.src_step = + is_blk_format ? static_cast(blk_size) * src_type_size : static_cast(C) * src_type_size; + arg.dst_step = + is_blk_format ? static_cast(blk_size) * dst_type_size : static_cast(C) * dst_type_size; + arg.block_size = is_blk_format ? static_cast(blk_size) : nstl::min(blk_size, C - c); + arg.work_amount = static_cast(std::min(static_cast(batch_size), H * W - b * batch_size)); (*pKernel)(&arg); }); @@ -1900,10 +1968,13 @@ void FakeQuantize::executeQuantization(const std::unique_ptr(FQ_add_input_type::OUTPUT_SHIFT)] ? &outputShift[0] : &outputShift[c]; - arg.src_step = is_blk_format ? (size_t)blk_size * src_type_size : (size_t)C * src_type_size; - arg.dst_step = is_blk_format ? (size_t)blk_size * dst_type_size : (size_t)C * dst_type_size; - arg.block_size = (is_blk_format && srcDims.size() != 2) ? (size_t)blk_size : nstl::min(blk_size, C - c); - arg.work_amount = (size_t)W; + arg.src_step = + is_blk_format ? static_cast(blk_size) * src_type_size : static_cast(C) * src_type_size; + arg.dst_step = + is_blk_format ? static_cast(blk_size) * dst_type_size : static_cast(C) * dst_type_size; + arg.block_size = + (is_blk_format && srcDims.size() != 2) ? static_cast(blk_size) : nstl::min(blk_size, C - c); + arg.work_amount = static_cast(W); (*pKernel)(&arg); }); @@ -1924,8 +1995,9 @@ void FakeQuantize::execute(const dnnl::stream& strm) { } void FakeQuantize::initializePostOpData(const VectorDims& dims, const size_t bufferAlignment, bool doRounding) { - if (postOpDataVersion == parameterVersion) + if (postOpDataVersion == parameterVersion) { return; + } if (getAlgorithm() == Algorithm::FQBinarization) { const auto realAxisSize = dims[dims.size() > 1 ? 1 : 0]; @@ -1953,8 +2025,9 @@ void FakeQuantize::initializePostOpData(const VectorDims& dims, const size_t buf } void FakeQuantize::initializePostOpDataLegacy(const VectorDims& dims, const size_t bufferAlignment) { - if (legacyPostOpDataVersion == parameterVersion) + if (legacyPostOpDataVersion == parameterVersion) { return; + } if (getAlgorithm() == Algorithm::FQBinarization) { const auto realAxisSize = dims[dims.size() > 1 ? 1 : 0]; @@ -2026,7 +2099,7 @@ void FakeQuantize::appendPostOpsImpl(dnnl::post_ops& ops, const VectorDims& post if (getAlgorithm() == Algorithm::FQBinarization) { ops.append_binarization(dnnl::algorithm::binarization_depthwise, (const float*)&binarizationThresholds[0], - (const float*)&binarizationOutputMask[0]); + reinterpret_cast(&binarizationOutputMask[0])); } else { dnnl::algorithm alg = getAlgorithm() == Algorithm::FQQuantization ? dnnl::algorithm::quantization_quantize @@ -2096,12 +2169,14 @@ void FakeQuantize::appendPostOps(dnnl::post_ops& ops, static float roundHalfToEven(float f) { const float RHAFZ = std::round(f); // r is round-half-away-from-zero const float d = RHAFZ - f; // f + d -> RHAFZ - if ((d != 0.5f) && (d != -0.5f)) + if ((d != 0.5f) && (d != -0.5f)) { return RHAFZ; + } // already even +/-1.5 -> +/-2 - if (std::fmod(RHAFZ, 2.0f) == 0.0f) + if (std::fmod(RHAFZ, 2.0f) == 0.0f) { return RHAFZ; + } // +/-2.5 -> +/-3, but we need it to to +/-2 // RHAFZ (f+d) goes the wrong way, should be (f-d) @@ -2139,8 +2214,9 @@ void FakeQuantize::updateOptimizedFormula(bool do_rounding) { // per-channel FQ. if (isPerTensor(inputShift, inputShift[0], 0.00005f)) { f.ish.resize(OC); - for (auto& v : f.ish) + for (auto& v : f.ish) { v = inputShift[0]; + } } else { f.ish = inputShift; } @@ -2150,14 +2226,18 @@ void FakeQuantize::updateOptimizedFormula(bool do_rounding) { f.osc = outputScale; f.osh = outputShift; - if (f.clo.size() == 1) + if (f.clo.size() == 1) { f.clo.resize(OC, f.clo[0]); - if (f.chi.size() == 1) + } + if (f.chi.size() == 1) { f.chi.resize(OC, f.chi[0]); - if (f.isc.size() == 1) + } + if (f.isc.size() == 1) { f.isc.resize(OC, f.isc[0]); - if (f.ish.size() == 1) + } + if (f.ish.size() == 1) { f.ish.resize(OC, f.ish[0]); + } for (size_t i = 0; i < OC; i++) { auto& clo = f.clo[i]; @@ -2169,16 +2249,18 @@ void FakeQuantize::updateOptimizedFormula(bool do_rounding) { clo = roundHalfToEven(clo * isc + ish); chi = roundHalfToEven(chi * isc + ish); - if (clo > chi) + if (clo > chi) { std::swap(clo, chi); + } if (!do_rounding) { // when no rounding is needed, outputScale/outputShift can be // merged with inputScale/inputShift with updated cropLow/cropHigh clo = clo * osc + osh; chi = chi * osc + osh; - if (clo > chi) + if (clo > chi) { std::swap(clo, chi); + } // crop(x*isc + ish, a, b)*osc + osh // crop(x*isc*osc + ish*osc + osh, a', b') @@ -2274,24 +2356,30 @@ bool FakeQuantize::appendAttrPostOps(DnnlPostOpsComposerLegacy& dnnlpoc, // return false before committing any change to DnnlPostOpsComposer if (!allowBinary) { - if (f.ish.size() > 1) + if (f.ish.size() > 1) { return false; + } if (!skipRoundClipOutputLinear) { - if (f.clo.size() > 1 || f.chi.size() > 1) + if (f.clo.size() > 1 || f.chi.size() > 1) { return false; - if (f.osc.size() > 1 || f.osh.size() > 1) + } + if (f.osc.size() > 1 || f.osh.size() > 1) { return false; + } } } - if (!dnnlpoc.appendLinear(f.isc, f.ish, isLastPostOp && skipRoundClipOutputLinear, allowBinary)) + if (!dnnlpoc.appendLinear(f.isc, f.ish, isLastPostOp && skipRoundClipOutputLinear, allowBinary)) { return false; + } - if (skipRoundClipOutputLinear) + if (skipRoundClipOutputLinear) { return true; + } - if (doRounding) + if (doRounding) { dnnlpoc.appendRoundHTE(); + } dnnlpoc.appendClip(f.clo, f.chi); dnnlpoc.appendLinear(f.osc, f.osh, isLastPostOp, allowBinary); return true; @@ -2301,20 +2389,23 @@ FakeQuantize::FakeQuantizeJitExecutor::FakeQuantizeJitExecutor(const jit_quantiz #if defined(OPENVINO_ARCH_X86_64) bool isBinarization = _jqp.op_type == Algorithm::FQBinarization; if (mayiuse(cpu::x64::avx512_core)) { - if (isBinarization) + if (isBinarization) { pKernel.reset(new jit_uni_binarization_kernel(_jqp)); - else + } else { pKernel.reset(new jit_uni_quantization_kernel(_jqp)); + } } else if (mayiuse(cpu::x64::avx2)) { - if (isBinarization) + if (isBinarization) { pKernel.reset(new jit_uni_binarization_kernel(_jqp)); - else + } else { pKernel.reset(new jit_uni_quantization_kernel(_jqp)); + } } else if (mayiuse(cpu::x64::sse41)) { - if (isBinarization) + if (isBinarization) { pKernel.reset(new jit_uni_binarization_kernel(_jqp)); - else + } else { pKernel.reset(new jit_uni_quantization_kernel(_jqp)); + } } else { OPENVINO_THROW("Can't create jit fake quantize kernel"); } @@ -2325,8 +2416,9 @@ FakeQuantize::FakeQuantizeJitExecutor::FakeQuantizeJitExecutor(const jit_quantiz } void FakeQuantize::FakeQuantizeJitExecutor::exec(const FakeQuantize& node) { - if (!pKernel) + if (!pKernel) { OPENVINO_THROW("Can't execute, kernel for fake quantize node is not compiled"); + } if (pKernel->jqp_.op_type == Algorithm::FQBinarization) { node.executeBinarization(pKernel); diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index bf215a9522a595..16c8c1b662efc4 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -118,11 +118,13 @@ bool FullyConnected::isSupportedCompressedOperation(const std::shared_ptr& op, const GraphC : Node(op, context, FCShapeInferFactory(op)) { std::string errorMessage; initTensorParallelConfig(context); - if (!isSupportedOperation(op, errorMessage)) + if (!isSupportedOperation(op, errorMessage)) { OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); + } m_atoi[ARG_SRC] = DATA; m_atoi[ARG_WEI] = WEIGHTS; @@ -454,12 +457,14 @@ static bool useSparseWeightsDecompression(const NodePtr& weightsInput, return false; } - if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx)) + if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx)) { return false; + } const auto constNode = std::dynamic_pointer_cast(weightsInput); - if (!constNode) + if (!constNode) { return false; + } const auto weiMemory = constNode->getMemoryPtr(); OPENVINO_ASSERT(weiMemory, "Cannot get const blob"); @@ -516,8 +521,9 @@ void FullyConnected::initSupportedPrimitiveDescriptors() { const auto& srcTypes = getOriginalInputPrecisions(); auto dstTypes = getOriginalOutputPrecisions(); // @todo graph optimizer should update original output precisions instead - if (!fusedWith.empty()) + if (!fusedWith.empty()) { dstTypes = fusedWith.back()->getOriginalOutputPrecisions(); + } VecMemoryDescs srcDescs; const auto& creatorsMap = BlockedDescCreator::getCommonCreators(); diff --git a/src/plugins/intel_cpu/src/nodes/gather.cpp b/src/plugins/intel_cpu/src/nodes/gather.cpp index f349990f56f620..fa763805e9ccc9 100644 --- a/src/plugins/intel_cpu/src/nodes/gather.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather.cpp @@ -77,8 +77,9 @@ Gather::Gather(const std::shared_ptr& op, const GraphContext::CPtr& co const auto& idxShape = getInputShapeAtPort(GATHER_INDICES); isIdxShapeStat = idxShape.isStatic(); const auto indicesRank = idxShape.getRank(); - if (dataSrcRank == 0lu || indicesRank == 0lu) + if (dataSrcRank == 0lu || indicesRank == 0lu) { THROW_CPU_NODE_ERR("has incorrect input parameters ranks."); + } if (ov::is_type(op)) { batchDims = static_cast(ov::as_type_ptr(op)->get_batch_dims()); @@ -87,10 +88,11 @@ Gather::Gather(const std::shared_ptr& op, const GraphContext::CPtr& co // and sets the dontReverseIndices flag. const auto& rti = op->get_rt_info(); const auto& reverse = rti.find("dontReverseIndices"); - if (reverse == rti.end()) + if (reverse == rti.end()) { reverseIndexing = true; - else + } else { reverseIndexing = false; + } } else if (ov::is_type(op)) { batchDims = static_cast(ov::as_type_ptr(op)->get_batch_dims()); reverseIndexing = false; @@ -99,18 +101,22 @@ Gather::Gather(const std::shared_ptr& op, const GraphContext::CPtr& co reverseIndexing = true; } - if (batchDims < 0) + if (batchDims < 0) { batchDims += indicesRank; - if (batchDims < 0 || batchDims > std::min(static_cast(dataSrcRank), static_cast(indicesRank))) + } + if (batchDims < 0 || batchDims > std::min(static_cast(dataSrcRank), static_cast(indicesRank))) { THROW_CPU_NODE_ERR("has incorrect batch_dims ", batchDims, "!"); + } if (ov::is_type(op->get_input_node_ptr(GATHER_AXIS))) { isAxisInputConst = true; axis = ov::as_type(op->get_input_node_ptr(GATHER_AXIS))->cast_vector()[0]; - if (axis < 0) + if (axis < 0) { axis += dataSrcRank; - if (axis < 0 || axis >= dataSrcRank || batchDims > axis) + } + if (axis < 0 || axis >= dataSrcRank || batchDims > axis) { THROW_CPU_NODE_ERR("has incorrect input parameter axis value: ", axis); + } } if (auto indices = ov::as_type(op->get_input_node_ptr(GATHER_INDICES))) { @@ -119,8 +125,9 @@ Gather::Gather(const std::shared_ptr& op, const GraphContext::CPtr& co } void Gather::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } dataTypeSize = getOriginalInputPrecisionAtPort(GATHER_DATA).size(); @@ -329,20 +336,24 @@ bool Gather::needPrepareParams() const { return false; } bool result = inputShapesModified(); - if (!isAxisInputConst) + if (!isAxisInputConst) { result = result || axis != (getSrcDataAtPortAs(GATHER_AXIS))[0]; + } return result; } void Gather::prepareParams() { auto dataMemPtr = getSrcMemoryAtPort(GATHER_DATA); - if (!dataMemPtr || !dataMemPtr->isDefined()) + if (!dataMemPtr || !dataMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined input data memory."); + } auto idxMemPtr = getSrcMemoryAtPort(GATHER_INDICES); - if (!idxMemPtr || !idxMemPtr->isDefined()) + if (!idxMemPtr || !idxMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined input indices memory."); - if (getSelectedPrimitiveDescriptor() == nullptr) + } + if (getSelectedPrimitiveDescriptor() == nullptr) { THROW_CPU_NODE_ERR("has unidentified preferable primitive descriptor."); + } // short 1D vector fast execution impl (typical in shape infer subgraph) canOptimize1DCase = false; @@ -358,10 +369,12 @@ void Gather::prepareParams() { if (!isAxisInputConst) { axis = (getSrcDataAtPortAs(GATHER_AXIS))[0]; - if (axis < 0) + if (axis < 0) { axis += dataSrcRank; - if (axis < 0 || axis >= dataSrcRank || batchDims > axis) + } + if (axis < 0 || axis >= dataSrcRank || batchDims > axis) { THROW_CPU_NODE_ERR("has incorrect input parameter axis value: ", axis); + } } if (!isDataShapeStat || !isAxisInputConst) { @@ -525,14 +538,16 @@ void Gather::executeDynamicImpl(const dnnl::stream& strm) { int remainder = idxElPerVec % specIndicesSize; for (uint64_t i = 1; i < idxElPerVec; i++) { permIdxMask[i] = permIdxMask[i - 1] + 1; - if (static_cast(permIdxMask[i]) == idxElPerVec) + if (static_cast(permIdxMask[i]) == idxElPerVec) { permIdxMask[i] = idxElPerVec - specIndicesSize; + } } for (uint64_t i = 0; i < idxElPerVec; i++) { - if (((start + i) % specIndicesSize) < (specIndicesSize - remainder)) + if (((start + i) % specIndicesSize) < (specIndicesSize - remainder)) { beforeAxisDiff[i] = axisDim * div; - else + } else { beforeAxisDiff[i] = axisDim * (div + 1); + } } arg.permIdxMask = permIdxMask; arg.beforeAxisDiff = beforeAxisDiff; @@ -550,13 +565,15 @@ void Gather::executeDynamicImpl(const dnnl::stream& strm) { } void Gather::initShortParams(threadExecParams& p, const uint64_t start) { - if (!jitKernel) + if (!jitKernel) { THROW_CPU_NODE_ERR("has uninitialized kernel in function initShortParams."); + } const uint64_t idxElPerVec = jitKernel->getIdxElPerVec(); if (afterAxisSize == 1) { // Elementwise gather. - if (specIndicesSize >= idxElPerVec) + if (specIndicesSize >= idxElPerVec) { return; // Is not a short case. + } p.permIdxMask.resize(idxElPerVec); p.srcBeforeAxisDiff.resize(idxElPerVec); @@ -564,8 +581,9 @@ void Gather::initShortParams(threadExecParams& p, const uint64_t start) { p.permIdxMask[0] = idxElPerVec - specIndicesSize; for (uint64_t i = 1; i < idxElPerVec; i++) { p.permIdxMask[i] = p.permIdxMask[i - 1] + 1; - if (static_cast(p.permIdxMask[i]) == idxElPerVec) + if (static_cast(p.permIdxMask[i]) == idxElPerVec) { p.permIdxMask[i] = idxElPerVec - specIndicesSize; + } } const int div = idxElPerVec / specIndicesSize; @@ -578,8 +596,9 @@ void Gather::initShortParams(threadExecParams& p, const uint64_t start) { } } } else { // Blocked gather. - if (afterAxisSize > idxElPerVec) + if (afterAxisSize > idxElPerVec) { return; // Is not a short case. + } p.afterAxIdxInBytes.resize(idxElPerVec); p.afterAxPermMask.resize(idxElPerVec); @@ -592,8 +611,9 @@ void Gather::initShortParams(threadExecParams& p, const uint64_t start) { p.afterAxIdxInBytes[i] = (start + i) % afterAxisSize; p.specIdxDiff[i] = (((secondStart + i) / afterAxisSize) % specIndicesSize) * idxTypeSize - p.specIdxInBytes[i]; - if (p.specIdxDiff[i] < 0) + if (p.specIdxDiff[i] < 0) { p.specIdxDiff[i] += specIndicesSize * idxTypeSize; + } p.srcBeforeAxisDiff[i] = ((start + i + idxElPerVec) / (specIndicesSize * afterAxisSize)) * axisAndAfterAxisSizeInBytes - ((start + i) / (specIndicesSize * afterAxisSize)) * axisAndAfterAxisSizeInBytes; @@ -601,16 +621,18 @@ void Gather::initShortParams(threadExecParams& p, const uint64_t start) { p.afterAxIdxInBytes[i] *= dataTypeSize; p.afterAxPermMask[i] = idxElPerVec - afterAxisSize + i; for (size_t j = 0lu; j < 6lu; j++) { - if (static_cast(p.afterAxPermMask[i]) >= idxElPerVec) + if (static_cast(p.afterAxPermMask[i]) >= idxElPerVec) { p.afterAxPermMask[i] -= afterAxisSize; + } } } if (specIndicesSize * afterAxisSize < idxElPerVec) { p.beforeAxPermMask[0] = idxElPerVec - specIndicesSize * afterAxisSize; for (uint64_t i = 1; i < idxElPerVec; i++) { p.beforeAxPermMask[i] = p.beforeAxPermMask[i - 1] + 1; - if (static_cast(p.beforeAxPermMask[i]) == idxElPerVec) + if (static_cast(p.beforeAxPermMask[i]) == idxElPerVec) { p.beforeAxPermMask[i] = idxElPerVec - specIndicesSize * afterAxisSize; + } } } @@ -633,10 +655,11 @@ void Gather::execCompressed4Bit() { parallel_for2d(beforeBatchSize, specIndicesSize, [&](const size_t b, const size_t j) { int ii = srcIndices[b * specIndicesSize + j]; if (ii < 0) { - if (reverseIndexing) + if (reverseIndexing) { ii += axisDim; - else + } else { ii = axisDim; + } } const size_t idx = ii; const size_t c2 = dstAfterBatchSize * b + afterAxisSize * j; @@ -693,8 +716,9 @@ void Gather::execCompressed4Bit() { } else { for (size_t i = 0; i < betweenBatchAndAxisSize; i++) { size_t dstIdx = c2 + specIdxAndAfterAxSize * i; - for (size_t p = 0; p < afterAxisSize; p++) + for (size_t p = 0; p < afterAxisSize; p++) { dstData[dstIdx] = 0; + } } } }); @@ -716,10 +740,11 @@ void Gather::execCompressed8Bit() { parallel_for2d(beforeBatchSize, specIndicesSize, [&](const size_t b, const size_t j) { int ii = srcIndices[b * specIndicesSize + j]; if (ii < 0) { - if (reverseIndexing) + if (reverseIndexing) { ii += axisDim; - else + } else { ii = axisDim; + } } const size_t idx = ii; const size_t c2 = dstAfterBatchSize * b + afterAxisSize * j; @@ -775,8 +800,9 @@ void Gather::execCompressed8Bit() { } else { for (size_t i = 0; i < betweenBatchAndAxisSize; i++) { size_t dstIdx = c2 + specIdxAndAfterAxSize * i; - for (size_t p = 0; p < afterAxisSize; p++) + for (size_t p = 0; p < afterAxisSize; p++) { dstData[dstIdx] = 0; + } } } }); @@ -871,10 +897,11 @@ void Gather::execReference() { parallel_for2d(beforeBatchSize, specIndicesSize, [&](const size_t b, const size_t j) { int ii = srcIndices[b * specIndicesSize + j]; if (ii < 0) { - if (reverseIndexing) + if (reverseIndexing) { ii += axisDim; - else + } else { ii = axisDim; + } } const size_t idx = ii; const size_t c2 = dstAfterBatchSize * b + afterAxisSizeInBytes * j; @@ -908,10 +935,11 @@ void Gather::exec1DCase() { for (size_t i = 0; i < idxCnt; i++) { auto ii = pidx[i]; if (ii < 0) { - if (reverseIndexing) + if (reverseIndexing) { ii += axisDim; - else + } else { ii = axisDim; + } } pdst[i] = psrc[ii]; } @@ -932,8 +960,9 @@ void Gather::resolveInPlaceEdges(Edge::LOOK look) { } auto selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) + if (selected_pd == nullptr) { OPENVINO_THROW("Preferable primitive descriptor is not set."); + } constexpr size_t outputPort = 0; auto& config = selected_pd->getConfig(); diff --git a/src/plugins/intel_cpu/src/nodes/gather_elements.cpp b/src/plugins/intel_cpu/src/nodes/gather_elements.cpp index 29bc32370d03de..7c282ef863a314 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_elements.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather_elements.cpp @@ -37,20 +37,24 @@ GatherElements::GatherElements(const std::shared_ptr& op, const GraphC if (!isSupportedOperation(op, errorMessage)) { OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } - if (inputShapes.size() != 2 || outputShapes.size() != 1) + if (inputShapes.size() != 2 || outputShapes.size() != 1) { THROW_CPU_NODE_ERR("has invalid number of input/output edges."); + } const auto dataRank = getInputShapeAtPort(dataIndex_).getRank(); const auto indicesRank = getInputShapeAtPort(indicesIndex_).getRank(); - if (dataRank != indicesRank) + if (dataRank != indicesRank) { THROW_CPU_NODE_ERR("has invalid input shapes. Inputs 'Data' and 'Indices' must have equal ranks."); + } auto gatherElementsOp = ov::as_type_ptr(op); auto axis = gatherElementsOp->get_axis(); - if (axis < 0) + if (axis < 0) { axis += dataRank; - if (axis < 0 || axis >= static_cast(dataRank)) + } + if (axis < 0 || axis >= static_cast(dataRank)) { THROW_CPU_NODE_ERR("has invalid axis attribute: ", axis); + } axis_ = axis; } @@ -58,20 +62,23 @@ void GatherElements::prepareParams() { const auto& dataDims = getParentEdgeAt(dataIndex_)->getMemory().getStaticDims(); const auto& dstDims = getChildEdgeAt(0)->getMemory().getStaticDims(); strideAxDst_ = 1; - for (size_t i = dstDims.size() - 1; i > axis_; i--) + for (size_t i = dstDims.size() - 1; i > axis_; i--) { strideAxDst_ *= dstDims[i]; + } dstAxDim_ = dstDims[axis_]; if (axis_ > 0) { strideAx1Diff_ = 1; - for (size_t i = dataDims.size() - 1; i >= axis_; i--) + for (size_t i = dataDims.size() - 1; i >= axis_; i--) { strideAx1Diff_ *= dataDims[i]; + } strideAx1Diff_ -= strideAxDst_ * dstDims[axis_]; } } void GatherElements::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } ov::element::Type inDataPrecision = getOriginalInputPrecisionAtPort(dataIndex_); if (!one_of(inDataPrecision.size(), @@ -107,8 +114,9 @@ void GatherElements::directExecution() { auto threadBody = [&](const int ithr, const int nthr) { int start(0lu), end(0lu); splitter(outSize, nthr, ithr, start, end); - if (start >= end) + if (start >= end) { return; + } int axStrideIt = start % strideAxDst_; int dstAxIdx = (start / strideAxDst_) % dstAxDim_; diff --git a/src/plugins/intel_cpu/src/nodes/gather_nd.cpp b/src/plugins/intel_cpu/src/nodes/gather_nd.cpp index 8df99882adc9cf..a433ae40fa2479 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_nd.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather_nd.cpp @@ -40,8 +40,9 @@ GatherND::GatherND(const std::shared_ptr& op, const GraphContext::CPtr OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } - if (inputShapes.size() != 2 && outputShapes.size() != 1) + if (inputShapes.size() != 2 && outputShapes.size() != 1) { THROW_CPU_NODE_ERR("has invalid number of input/output edges."); + } const size_t dataInputRank = getInputShapeAtPort(GATHERND_DATA).getRank(); const size_t indicesInputRank = getInputShapeAtPort(GATHERND_INDEXES).getRank(); @@ -53,13 +54,15 @@ GatherND::GatherND(const std::shared_ptr& op, const GraphContext::CPtr } else { THROW_CPU_NODE_ERR("has support only opset5."); } - if (attrs.batchDims >= std::min(dataInputRank, indicesInputRank)) + if (attrs.batchDims >= std::min(dataInputRank, indicesInputRank)) { THROW_CPU_NODE_ERR("has invalid batch_dims attribute: ", attrs.batchDims); + } } void GatherND::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } ov::element::Type inDataPrecision = getOriginalInputPrecisionAtPort(GATHERND_DATA); if (!one_of(inDataPrecision.size(), @@ -90,14 +93,18 @@ void GatherND::prepareParams() { auto srcMemPtr = getSrcMemoryAtPort(GATHERND_DATA); auto idxMemPtr = getSrcMemoryAtPort(GATHERND_INDEXES); auto dstMemPtr = getDstMemoryAtPort(0); - if (!srcMemPtr || !srcMemPtr->isDefined()) + if (!srcMemPtr || !srcMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined input memory of 'data'."); - if (!idxMemPtr || !idxMemPtr->isDefined()) + } + if (!idxMemPtr || !idxMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined input memory of 'indices'."); - if (!dstMemPtr || !dstMemPtr->isDefined()) + } + if (!dstMemPtr || !dstMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined output memory."); - if (getSelectedPrimitiveDescriptor() == nullptr) + } + if (getSelectedPrimitiveDescriptor() == nullptr) { THROW_CPU_NODE_ERR("has unidentified preferable primitive descriptor."); + } attrs.srcDims = srcMemPtr->getStaticDims(); attrs.srcStrides = srcMemPtr->getDescWithType()->getStrides(); @@ -109,25 +116,26 @@ void GatherND::prepareParams() { GatherND::GatherNDExecutor::GatherNDExecutor(const GatherNDAttributes& attrs) : batchSize(std::accumulate(attrs.srcDims.begin(), attrs.srcDims.begin() + attrs.batchDims, - size_t(1), + static_cast(1), std::multiplies())), dataSize(attrs.dataSize), sliceRank(attrs.sliceRank), dataLength(std::accumulate(attrs.srcDims.begin() + sliceRank + attrs.batchDims, attrs.srcDims.end(), - size_t(1), + static_cast(1), std::multiplies())), cycles(attrs.dstElementCount / (dataLength * batchSize)), workAmount(batchSize * cycles), srcBatchStride(std::accumulate(attrs.srcDims.begin() + attrs.batchDims, attrs.srcDims.end(), - size_t(1), + static_cast(1), std::multiplies())), idxBatchStride(cycles * sliceRank), dstBatchStride(cycles * dataLength) { srcShifts.resize(attrs.sliceRank, 0); - for (size_t i = 0; i < attrs.sliceRank; i++) + for (size_t i = 0; i < attrs.sliceRank; i++) { srcShifts[i] = attrs.srcStrides[i + attrs.batchDims] * (dataLength > 1 ? dataSize : 1); + } // optimized implementation 'blocks' via memcpy if (dataLength > 1) { @@ -138,8 +146,9 @@ GatherND::GatherNDExecutor::GatherNDExecutor(const GatherNDAttributes& attrs) } void GatherND::execute(const dnnl::stream& strm) { - if (!execPtr) + if (!execPtr) { THROW_CPU_NODE_ERR("has not compiled executor."); + } execPtr->exec(getSrcMemoryAtPort(GATHERND_DATA), getSrcMemoryAtPort(GATHERND_INDEXES), getDstMemoryAtPort(0)); } @@ -175,8 +184,9 @@ void GatherND::GatherNDExecutor::gatherBlocks(const MemoryPtr& srcMemPtr, parallel_nt(0, [&](const int ithr, const int nthr) { size_t start(0lu), end(0lu); splitter(workAmount, nthr, ithr, start, end); - if (start >= end) + if (start >= end) { return; + } size_t bStart = start / cycles; size_t cStart = start % cycles; size_t workCounter = start; @@ -188,8 +198,9 @@ void GatherND::GatherNDExecutor::gatherBlocks(const MemoryPtr& srcMemPtr, for (size_t b = bStart; b < batchSize; b++) { for (size_t j = cStart; j < cycles; j++) { size_t dataIdx = 0lu; - for (size_t i = 0; i < sliceRank; i++) + for (size_t i = 0; i < sliceRank; i++) { dataIdx += srcShifts[i] * shiftedIndices[i]; + } cpu_memcpy(shiftedDstData, &(shiftedSrcData[dataIdx]), dataLength); shiftedDstData += dataLength; shiftedIndices += sliceRank; @@ -214,8 +225,9 @@ void GatherND::GatherNDExecutor::gatherElementwise(const MemoryPtr& srcMemPtr, parallel_nt(0, [&](const int ithr, const int nthr) { size_t start(0lu), end(0lu); splitter(workAmount, nthr, ithr, start, end); - if (start >= end) + if (start >= end) { return; + } size_t bStart = start / cycles; size_t cStart = start % cycles; size_t workCounter = start; @@ -227,8 +239,9 @@ void GatherND::GatherNDExecutor::gatherElementwise(const MemoryPtr& srcMemPtr, for (size_t b = bStart; b < batchSize; b++) { for (size_t j = cStart; j < cycles; j++) { size_t dataIdx = 0lu; - for (size_t i = 0lu; i < sliceRank; i++) + for (size_t i = 0lu; i < sliceRank; i++) { dataIdx += srcShifts[i] * shiftedIndices[i]; + } shiftedDstData[0] = shiftedSrcData[dataIdx]; shiftedDstData++; shiftedIndices += sliceRank; diff --git a/src/plugins/intel_cpu/src/nodes/gather_tree.cpp b/src/plugins/intel_cpu/src/nodes/gather_tree.cpp index 12b0754d0fb869..526659ac7189b7 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_tree.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather_tree.cpp @@ -36,28 +36,36 @@ GatherTree::GatherTree(const std::shared_ptr& op, const GraphContext:: OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } - if (inputShapes.size() != 4) + if (inputShapes.size() != 4) { THROW_CPU_NODE_ERR("has incorrect number of input edges."); - if (outputShapes.size() != 1) + } + if (outputShapes.size() != 1) { THROW_CPU_NODE_ERR("has incorrect number of output edges."); + } - if (getInputShapeAtPort(GATHER_TREE_STEP_IDX).getRank() != 3) + if (getInputShapeAtPort(GATHER_TREE_STEP_IDX).getRank() != 3) { THROW_CPU_NODE_ERR("step_idx vector should be 3 dimension"); - if (getInputShapeAtPort(GATHER_TREE_PARENT_IDX).getRank() != 3) + } + if (getInputShapeAtPort(GATHER_TREE_PARENT_IDX).getRank() != 3) { THROW_CPU_NODE_ERR("parent_idx vector should be 3 dimension"); - if (getInputShapeAtPort(GATHER_TREE_MAX_SEQ_LEN).getRank() != 1) + } + if (getInputShapeAtPort(GATHER_TREE_MAX_SEQ_LEN).getRank() != 1) { THROW_CPU_NODE_ERR("max_seq_len vector should be 1 dimension"); - if (!is_scalar(op->get_input_partial_shape(GATHER_TREE_END_TOKEN))) + } + if (!is_scalar(op->get_input_partial_shape(GATHER_TREE_END_TOKEN))) { THROW_CPU_NODE_ERR("end_token should be scalar"); + } } void GatherTree::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } precision = getOriginalInputPrecisionAtPort(GATHER_TREE_STEP_IDX); - if (!one_of(precision, ov::element::f32, ov::element::i32)) + if (!one_of(precision, ov::element::f32, ov::element::i32)) { precision = ov::element::f32; + } if (getOriginalInputPrecisionAtPort(GATHER_TREE_PARENT_IDX) != precision || getOriginalInputPrecisionAtPort(GATHER_TREE_MAX_SEQ_LEN) != precision || @@ -75,21 +83,23 @@ void GatherTree::initSupportedPrimitiveDescriptors() { } void GatherTree::execute(const dnnl::stream& strm) { - if (!execPtr) + if (!execPtr) { THROW_CPU_NODE_ERR("has not compiled executor."); + } - if (precision == ov::element::f32) + if (precision == ov::element::f32) { execPtr->exec(getSrcMemoryAtPort(GATHER_TREE_STEP_IDX), getSrcMemoryAtPort(GATHER_TREE_PARENT_IDX), getSrcMemoryAtPort(GATHER_TREE_MAX_SEQ_LEN), getSrcMemoryAtPort(GATHER_TREE_END_TOKEN), getDstMemoryAtPort(0)); - else + } else { execPtr->exec(getSrcMemoryAtPort(GATHER_TREE_STEP_IDX), getSrcMemoryAtPort(GATHER_TREE_PARENT_IDX), getSrcMemoryAtPort(GATHER_TREE_MAX_SEQ_LEN), getSrcMemoryAtPort(GATHER_TREE_END_TOKEN), getDstMemoryAtPort(0)); + } } void GatherTree::prepareParams() { @@ -98,16 +108,21 @@ void GatherTree::prepareParams() { const auto& maxSeqLenMemPtr = getSrcMemoryAtPort(GATHER_TREE_MAX_SEQ_LEN); const auto& dstMemPtr = getDstMemoryAtPort(0); - if (!stepIdxMemPtr || !stepIdxMemPtr->isDefined()) + if (!stepIdxMemPtr || !stepIdxMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined input memory of 'step_ids'."); - if (!parentIdxMemPtr || !parentIdxMemPtr->isDefined()) + } + if (!parentIdxMemPtr || !parentIdxMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined input memory of 'parent_ids'."); - if (!maxSeqLenMemPtr || !maxSeqLenMemPtr->isDefined()) + } + if (!maxSeqLenMemPtr || !maxSeqLenMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined input memory of 'max_seq_len'."); - if (!dstMemPtr || !dstMemPtr->isDefined()) + } + if (!dstMemPtr || !dstMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined output memory."); - if (getSelectedPrimitiveDescriptor() == nullptr) + } + if (getSelectedPrimitiveDescriptor() == nullptr) { THROW_CPU_NODE_ERR("has unidentified preferable primitive descriptor."); + } const VectorDims& stepIdxDims = stepIdxMemPtr->getStaticDims(); const VectorDims& parentIdxDims = parentIdxMemPtr->getStaticDims(); @@ -155,8 +170,9 @@ void GatherTree::GatherTreeExecutor::exec(const MemoryPtr& stepIdxMemPtr, int32_t maxSequenceInBeam = std::min(maxTime, static_cast(maxSeqLen[batch])); if (maxSequenceInBeam > 0) { int32_t time, idx = (maxTime - 1) * bbSize + batch * beamWidth; - for (time = (maxTime - 1); time >= maxSequenceInBeam; time--, idx -= bbSize) + for (time = (maxTime - 1); time >= maxSequenceInBeam; time--, idx -= bbSize) { finalIdx[idx + beam] = endToken; + } for (int32_t parent = static_cast(beam); time >= 0; time--, idx -= bbSize) { if (parent < 0 || parent >= static_cast(beamWidth) || @@ -171,10 +187,11 @@ void GatherTree::GatherTreeExecutor::exec(const MemoryPtr& stepIdxMemPtr, bool finished = false; auto* final = &finalIdx[batch * beamWidth + beam]; for (time = 0; time < maxSequenceInBeam; time++, final += bbSize) { - if (finished) + if (finished) { (*final) = endToken; - else if ((*final) == endToken) + } else if ((*final) == endToken) { finished = true; + } } } }); diff --git a/src/plugins/intel_cpu/src/nodes/generate_proposals.cpp b/src/plugins/intel_cpu/src/nodes/generate_proposals.cpp index 64965b4e61a138..c7e4466f88158e 100644 --- a/src/plugins/intel_cpu/src/nodes/generate_proposals.cpp +++ b/src/plugins/intel_cpu/src/nodes/generate_proposals.cpp @@ -154,12 +154,14 @@ void nms_cpu(const int num_boxes, #endif for (int box = 0; box < num_boxes; ++box) { - if (is_dead[box]) + if (is_dead[box]) { continue; + } index_out[count++] = base_index + box; - if (count == static_cast(max_num_out)) + if (count == static_cast(max_num_out)) { break; + } int tail = box + 1; @@ -246,8 +248,9 @@ void nms_cpu(const int num_boxes, res = area / (A_area + B_area - area); } - if (nms_thresh < res) + if (nms_thresh < res) { is_dead[tail] = 1; + } } } @@ -324,8 +327,9 @@ GenerateProposals::GenerateProposals(const std::shared_ptr& op, const } void GenerateProposals::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } auto roiNumPrecision = getOriginalOutputPrecisionAtPort(OUTPUT_ROI_NUM); addSupportedPrimDesc({{LayoutType::ncsp, ov::element::f32}, @@ -359,16 +363,18 @@ void GenerateProposals::execute(const dnnl::stream& strm) { for (size_t i = 1; i < deltaDims.size(); i++) { deltas_dims_size *= deltaDims[i]; } - if (anchor_dims_size != deltas_dims_size) + if (anchor_dims_size != deltas_dims_size) { OPENVINO_THROW("'Anchors' blob size for GenerateProposals is incompatible with 'deltas' blob size!"); + } size_t score_dims_size = 1; const auto& scoreDims = getParentEdgeAt(INPUT_SCORES)->getMemory().getStaticDims(); for (size_t i = 1; i < scoreDims.size(); i++) { score_dims_size *= scoreDims[i]; } - if (deltas_dims_size != (4 * score_dims_size)) + if (deltas_dims_size != (4 * score_dims_size)) { OPENVINO_THROW("'Deltas' blob size for GenerateProposals is incompatible with 'scores' blob size!"); + } size_t im_info_dims_size = 1; const auto& infoDims = getParentEdgeAt(INPUT_IM_INFO)->getMemory().getStaticDims(); diff --git a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp index 7a8eb1088453c7..46732779df879f 100644 --- a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp +++ b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp @@ -44,18 +44,22 @@ GridSample::GridSample(const std::shared_ptr& op, const GraphContext:: THROW_CPU_NODE_ERR(errorMessage); } - if (op->get_input_size() != 2 || op->get_output_size() != 1) + if (op->get_input_size() != 2 || op->get_output_size() != 1) { THROW_CPU_NODE_ERR("has incorrect number of input/output ports."); + } const auto& dataShape = getInputShapeAtPort(IN_DATA); - if (dataShape.getRank() != 4) + if (dataShape.getRank() != 4) { THROW_CPU_NODE_ERR("has incorrect rank of the Data input."); + } const auto& gridShape = getInputShapeAtPort(IN_GRID); - if (gridShape.getRank() != 4) + if (gridShape.getRank() != 4) { THROW_CPU_NODE_ERR("has incorrect rank of the Grid input."); - if (gridShape.isStatic() && gridShape.getDims()[3] != 2) + } + if (gridShape.isStatic() && gridShape.getDims()[3] != 2) { THROW_CPU_NODE_ERR("has incorrect shape of the Grid input. The 4th dimension should be equal to 2."); + } const auto& attributes = ov::as_type_ptr(op)->get_attributes(); alignCorners = attributes.align_corners; @@ -88,8 +92,9 @@ GridSample::GridSample(const std::shared_ptr& op, const GraphContext:: } void GridSample::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } dataPrecision = getOriginalInputPrecisionAtPort(IN_DATA); if (dataPrecision != ov::element::i32) { @@ -181,16 +186,20 @@ void GridSample::createPrimitive() { void GridSample::prepareParams() { auto dataMemPtr = getSrcMemoryAtPort(IN_DATA); - if (!dataMemPtr || !dataMemPtr->isDefined()) + if (!dataMemPtr || !dataMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined input data memory."); + } auto gridMemPtr = getSrcMemoryAtPort(IN_GRID); - if (!gridMemPtr || !gridMemPtr->isDefined()) + if (!gridMemPtr || !gridMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined input grid memory."); + } auto dstMemPtr = getDstMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isDefined()) + if (!dstMemPtr || !dstMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined output memory."); - if (getSelectedPrimitiveDescriptor() == nullptr) + } + if (getSelectedPrimitiveDescriptor() == nullptr) { THROW_CPU_NODE_ERR("has unidentified preferable primitive descriptor."); + } const uint64_t dataElPerVec = jitKernel->getDataElPerVec(); const auto& srcDataShape = dataMemPtr->getStaticDims(); diff --git a/src/plugins/intel_cpu/src/nodes/grn.cpp b/src/plugins/intel_cpu/src/nodes/grn.cpp index 8a58ee5ef2cdcf..b4c9b6dacb6c80 100644 --- a/src/plugins/intel_cpu/src/nodes/grn.cpp +++ b/src/plugins/intel_cpu/src/nodes/grn.cpp @@ -34,23 +34,27 @@ GRN::GRN(const std::shared_ptr& op, const GraphContext::CPtr& context) } const auto grn = ov::as_type_ptr(op); - if (grn == nullptr) + if (grn == nullptr) { THROW_CPU_NODE_ERR("is not an instance of GRN from opset1."); + } - if (inputShapes.size() != 1 || outputShapes.size() != 1) + if (inputShapes.size() != 1 || outputShapes.size() != 1) { THROW_CPU_NODE_ERR("has incorrect number of input/output edges!"); + } const auto dataRank = getInputShapeAtPort(0).getRank(); - if (dataRank != getOutputShapeAtPort(0).getRank()) + if (dataRank != getOutputShapeAtPort(0).getRank()) { THROW_CPU_NODE_ERR("has input/output rank mismatch"); + } bias = grn->get_bias(); } void GRN::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } addSupportedPrimDesc({{LayoutType::ncsp, ov::element::f32, false, 0}}, {{LayoutType::ncsp, ov::element::f32, false, 0}}, @@ -61,29 +65,37 @@ void GRN::prepareParams() { const auto& dataMemPtr = getSrcMemoryAtPort(0); const auto& dstMemPtr = getDstMemoryAtPort(0); - if (!dataMemPtr || !dataMemPtr->isDefined()) + if (!dataMemPtr || !dataMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined input memory"); - if (!dstMemPtr || !dstMemPtr->isDefined()) + } + if (!dstMemPtr || !dstMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined output memory"); - if (getSelectedPrimitiveDescriptor() == nullptr) + } + if (getSelectedPrimitiveDescriptor() == nullptr) { THROW_CPU_NODE_ERR("has unidentified preferable primitive descriptor"); + } const VectorDims& dataDims = dataMemPtr->getStaticDims(); const VectorDims& dstDims = dstMemPtr->getStaticDims(); for (size_t i = 0; i < dataDims.size(); ++i) { - if (dataDims[i] != dstDims[i]) + if (dataDims[i] != dstDims[i]) { THROW_CPU_NODE_ERR("hsd input/output tensors dimensions mismatch"); + } } - if (dataDims.size() > 0) + if (dataDims.size() > 0) { N = static_cast(dataDims[0]); - if (dataDims.size() > 1) + } + if (dataDims.size() > 1) { C = static_cast(dataDims[1]); - if (dataDims.size() > 2) + } + if (dataDims.size() > 2) { H = static_cast(dataDims[2]); - if (dataDims.size() > 3) + } + if (dataDims.size() > 3) { W = static_cast(dataDims[3]); + } } void GRN::executeDynamicImpl(const dnnl::stream& strm) { diff --git a/src/plugins/intel_cpu/src/nodes/if.cpp b/src/plugins/intel_cpu/src/nodes/if.cpp index 683eb3c35a9b85..6793aebc4bb4d5 100644 --- a/src/plugins/intel_cpu/src/nodes/if.cpp +++ b/src/plugins/intel_cpu/src/nodes/if.cpp @@ -22,8 +22,9 @@ If::PortMapHelper::PortMapHelper(MemoryPtr from, std::deque to, const : srcMemPtr(std::move(from)), dstMemPtrs(std::move(to)), size(0) { - if (srcMemPtr->getDesc().isDefined()) + if (srcMemPtr->getDesc().isDefined()) { size = srcMemPtr->getShape().getElementsCount(); + } // Backup dstMemPtrs for (auto& ptr : dstMemPtrs) { @@ -158,8 +159,9 @@ void If::getSupportedDescriptors() { } void If::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } NodeConfig config; config.inConfs.reserve(getParentEdges().size()); @@ -239,8 +241,9 @@ void If::prepareAfterMappers(const bool isThen, const dnnl::engine& eng) { std::deque If::getToMemories(const Node* node, const size_t port) const { std::deque memories; - for (const auto& edge : node->getChildEdgesAtPort(port)) + for (const auto& edge : node->getChildEdgesAtPort(port)) { memories.push_back(edge->getMemoryPtr()); + } return memories; } @@ -251,12 +254,14 @@ void If::execute(const dnnl::stream& strm) { auto& afterMappers = condition ? afterThenMappers : afterElseMappers; auto& subGraph = condition ? subGraphThen : subGraphElse; - for (auto& mapper : beforeMappers) + for (auto& mapper : beforeMappers) { mapper->execute(strm); + } subGraph.ResetInferCount(); subGraph.Infer(); - for (auto& mapper : afterMappers) + for (auto& mapper : afterMappers) { mapper->execute(strm); + } } void If::executeDynamicImpl(const dnnl::stream& strm) { diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp index f812da7ca01159..8b6b09a9ec4bd0 100644 --- a/src/plugins/intel_cpu/src/nodes/input.cpp +++ b/src/plugins/intel_cpu/src/nodes/input.cpp @@ -228,11 +228,12 @@ Input::Input(const std::shared_ptr& op, const GraphContext::CPtr& cont op::v0::Result::get_type_info_static(), op::v3::ReadValue::get_type_info_static(), op::v6::ReadValue::get_type_info_static(), - ov::intel_cpu::ReadValueWithSubgraph::get_type_info_static())) + ov::intel_cpu::ReadValueWithSubgraph::get_type_info_static())) { OPENVINO_THROW_NOT_IMPLEMENTED("CPU Input node doesn't support ngraph operation ", op->get_type_name(), " with name ", op->get_friendly_name()); + } if (auto constOp = ov::as_type_ptr(op)) { constant = ConstantType::Const; m_constOp = constOp; @@ -316,8 +317,9 @@ void Input::cloneBlobIfRequired() { if (prec == ov::element::f32) { uint32_t const* u32data = m_constOp->get_data_ptr(); - if (!size) + if (!size) { return false; + } #if defined(OPENVINO_ARCH_X86_64) if (auto fn = jit_has_subnormals_function()) { @@ -328,14 +330,16 @@ void Input::cloneBlobIfRequired() { parallel_for(iterations_num, [&](int n) { auto ptr = u32data + n * batch_size; - const jit_has_subnormals_base::args_t args = {reinterpret_cast(ptr), - std::min(batch_size, (size_t)(u32data + size - ptr)), - false}; + const jit_has_subnormals_base::args_t args = { + reinterpret_cast(ptr), + std::min(batch_size, static_cast(u32data + size - ptr)), + false}; fn(&args); - if (args.hasSubnormals) + if (args.hasSubnormals) { has_subnormals = true; + } }); return has_subnormals; @@ -377,26 +381,30 @@ void Input::cloneBlobIfRequired() { } static std::vector createInputShapes(const Shape& shape, const Type type) { - if (type == Type::Output) + if (type == Type::Output) { return {shape}; + } return {}; } static std::vector createOutputShapes(const Shape& shape, const Type type) { - if (type == Type::Input) + if (type == Type::Input) { return {shape}; + } return {}; } static std::vector createInputPrecisions(const ov::element::Type& prc, const Type type) { - if (type == Type::Output) + if (type == Type::Output) { return {prc}; + } return {}; } static std::vector createOutputPrecisions(const ov::element::Type& prc, const Type type) { - if (type == Type::Input) + if (type == Type::Input) { return {prc}; + } return {}; } @@ -447,21 +455,26 @@ MemoryCPtr Input::getMemoryPtr() const { void Input::getSupportedDescriptors() { if (getType() == Type::Input) { - if (!getParentEdges().empty()) + if (!getParentEdges().empty()) { THROW_CPU_NODE_ERR("has incorrect number of input edges."); - if (getChildEdges().empty()) + } + if (getChildEdges().empty()) { THROW_CPU_NODE_ERR("has incorrect number of output edges."); + } } else if (getType() == Type::Output) { - if (getParentEdges().size() != 1) + if (getParentEdges().size() != 1) { THROW_CPU_NODE_ERR("has incorrect number of input edges."); - if (!getChildEdges().empty()) + } + if (!getChildEdges().empty()) { THROW_CPU_NODE_ERR("has incorrect number of output edges."); + } } } void Input::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } if (extMemDesc) { initSupportedPdFromMemDesc(); @@ -471,15 +484,17 @@ void Input::initSupportedPrimitiveDescriptors() { } void Input::initOptimalPrimitiveDescriptor() { - if (m_useParentMemoryDescForOutput || extMemDesc) + if (m_useParentMemoryDescForOutput || extMemDesc) { return; + } Node::initOptimalPrimitiveDescriptor(); } void Input::selectOptimalPrimitiveDescriptor() { - if (!(m_useParentMemoryDescForOutput && getType() == Type::Output)) + if (!(m_useParentMemoryDescForOutput && getType() == Type::Output)) { return Node::selectOptimalPrimitiveDescriptor(); + } // ignore previous configuration supportedPrimitiveDescriptors.clear(); @@ -498,26 +513,29 @@ void Input::selectOptimalPrimitiveDescriptor() { void Input::createPrimitive() { for (size_t i = 0; i < getChildEdges().size(); i++) { auto dstMemPtr = getDstMemoryAtPort(i); - if (!dstMemPtr) + if (!dstMemPtr) { THROW_CPU_NODE_ERR("has null memory object at port ", i, " to node ", getChildEdgeAt(i)->getChild()->getName(), "."); + } } for (size_t i = 0; i < getParentEdges().size(); i++) { auto srcMemPtr = getSrcMemoryAtPort(i); - if (!srcMemPtr) + if (!srcMemPtr) { THROW_CPU_NODE_ERR("has null memory object at port ", i, " from node ", getParentEdgeAt(i)->getParent()->getName(), "."); + } } const NodeDesc* selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) + if (selected_pd == nullptr) { THROW_CPU_NODE_ERR("doesn't have selected primitive descriptor."); + } } bool Input::created() const { diff --git a/src/plugins/intel_cpu/src/nodes/interaction.cpp b/src/plugins/intel_cpu/src/nodes/interaction.cpp index d1ffcb3546754a..b4b100d5dff3da 100644 --- a/src/plugins/intel_cpu/src/nodes/interaction.cpp +++ b/src/plugins/intel_cpu/src/nodes/interaction.cpp @@ -38,8 +38,9 @@ struct jit_move_scale_kernel : public jit_uni_move_scale_kernel, public jit_gene : jit_uni_move_scale_kernel(jcp), jit_generator(jit_name()) { runtime_prc = jcp_.src_prc == ov::element::bf16 ? ov::element::bf16 : ov::element::f32; - if (jcp_.dst_prc == ov::element::i8 || jcp_.dst_prc == ov::element::u8) + if (jcp_.dst_prc == ov::element::i8 || jcp_.dst_prc == ov::element::u8) { runtime_prc = ov::element::f32; + } vec_size = dnnl::impl::cpu::x64::cpu_isa_traits::vlen / runtime_prc.size(); } virtual ~jit_move_scale_kernel() {} @@ -96,8 +97,9 @@ struct jit_move_scale_kernel : public jit_uni_move_scale_kernel, public jit_gene this->postamble(); for (const auto& emitter : emitters) { - if (emitter.second) + if (emitter.second) { emitter.second->emit_data(); + } } } @@ -194,8 +196,9 @@ Interaction::Interaction(const std::shared_ptr& op, const GraphContext } void Interaction::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } dataPrecision = getOriginalInputPrecisionAtPort(0); if (dataPrecision != ov::element::f32 && dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16)) { dataPrecision = ov::element::bf16; diff --git a/src/plugins/intel_cpu/src/nodes/interpolate.cpp b/src/plugins/intel_cpu/src/nodes/interpolate.cpp index b50e50ac05fabd..67fe90226761ba 100644 --- a/src/plugins/intel_cpu/src/nodes/interpolate.cpp +++ b/src/plugins/intel_cpu/src/nodes/interpolate.cpp @@ -182,8 +182,9 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi this->postamble(); emit_emitters_data(); - for (auto& inj : eltwise_injectors) + for (auto& inj : eltwise_injectors) { inj->prepare_table(); + } if ((jcp_.mode == InterpolateMode::cubic) && (jcp_.layout == InterpolateLayoutType::planar)) { prepare_cubic_planar_table(); } @@ -283,8 +284,9 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi void emit_emitters_data() { for (const auto& emitter : emitters) { - if (emitter.second) + if (emitter.second) { emitter.second->emit_data(); + } } } @@ -535,8 +537,9 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi uni_vmovdqu(vmm_index, ptr[reg_index]); uni_vpcmpeqd(vmm_mask, vmm_mask, vmm_mask); vgatherdps(vmm_val, ptr[reg_src_h + vmm_index], vmm_mask); - if (attr_.post_ops_.len() != 0) + if (attr_.post_ops_.len() != 0) { apply_post_ops(jcp_.dst_prc, 1); + } store(vmm_val, reg_dst, vector_step); add(reg_dst, vector_step * jcp_.dst_data_size); @@ -557,8 +560,9 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi add(reg_src_aux, reg_index_offset); load(reg_src_aux, vmm_val, scalar_step); - if (attr_.post_ops_.len() != 0) + if (attr_.post_ops_.len() != 0) { apply_post_ops(jcp_.dst_prc, 1); + } store(vmm_val, reg_dst, scalar_step); add(reg_dst, scalar_step * jcp_.dst_data_size); @@ -591,8 +595,9 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi add(reg_src_aux, reg_index_offset); load(reg_src_aux, vmm_val, vector_step); - if (attr_.post_ops_.len() != 0) + if (attr_.post_ops_.len() != 0) { apply_post_ops(jcp_.dst_prc, 0); + } store(vmm_val, reg_dst, vector_step); add(reg_dst, vector_step * jcp_.dst_data_size); @@ -651,8 +656,9 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi add(reg_src_aux, reg_index_offset); mov(reg_work_amount, reg_work_amount_bk); - if (attr_.post_ops_.len() != 0) + if (attr_.post_ops_.len() != 0) { mov(reg_oc_off, reg_oc_off_bk); + } L(nn_loop_label); { @@ -660,8 +666,9 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi jl(nn_loop_end_label, T_NEAR); load(reg_src_aux, vmm_val, vector_step); - if (attr_.post_ops_.len() != 0) + if (attr_.post_ops_.len() != 0) { apply_post_ops(jcp_.dst_prc, 0); + } store(vmm_val, reg_dst, vector_step); add(reg_dst, vector_step * jcp_.dst_data_size); @@ -675,8 +682,9 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi if (tail_step != 0) { load(reg_src_aux, vmm_val, tail_step); - if (attr_.post_ops_.len() != 0) + if (attr_.post_ops_.len() != 0) { apply_post_ops(jcp_.dst_prc, 0); + } store(vmm_val, reg_dst, tail_step); // check to remove below @@ -1658,35 +1666,49 @@ size_t InterpolateKey::hash() const { } bool InterpolateKey::operator==(const InterpolateKey& rhs) const { - if (nodeAttrs.mode != rhs.nodeAttrs.mode) + if (nodeAttrs.mode != rhs.nodeAttrs.mode) { return false; - if (nodeAttrs.coordTransMode != rhs.nodeAttrs.coordTransMode) + } + if (nodeAttrs.coordTransMode != rhs.nodeAttrs.coordTransMode) { return false; - if (nodeAttrs.nearestMode != rhs.nodeAttrs.nearestMode) + } + if (nodeAttrs.nearestMode != rhs.nodeAttrs.nearestMode) { return false; - if (nodeAttrs.layout != rhs.nodeAttrs.layout) + } + if (nodeAttrs.layout != rhs.nodeAttrs.layout) { return false; - if (nodeAttrs.antialias != rhs.nodeAttrs.antialias) + } + if (nodeAttrs.antialias != rhs.nodeAttrs.antialias) { return false; - if (nodeAttrs.cubeCoeff != rhs.nodeAttrs.cubeCoeff) + } + if (nodeAttrs.cubeCoeff != rhs.nodeAttrs.cubeCoeff) { return false; - if (nodeAttrs.padBegin != rhs.nodeAttrs.padBegin) + } + if (nodeAttrs.padBegin != rhs.nodeAttrs.padBegin) { return false; - if (nodeAttrs.padEnd != rhs.nodeAttrs.padEnd) + } + if (nodeAttrs.padEnd != rhs.nodeAttrs.padEnd) { return false; - if (nodeAttrs.inPrc != rhs.nodeAttrs.inPrc) + } + if (nodeAttrs.inPrc != rhs.nodeAttrs.inPrc) { return false; - if (nodeAttrs.outPrc != rhs.nodeAttrs.outPrc) + } + if (nodeAttrs.outPrc != rhs.nodeAttrs.outPrc) { return false; + } - if (srcDims != rhs.srcDims) + if (srcDims != rhs.srcDims) { return false; - if (dstDims != rhs.dstDims) + } + if (dstDims != rhs.dstDims) { return false; - if (dataScales != rhs.dataScales) + } + if (dataScales != rhs.dataScales) { return false; - if (!(*attr.get() == *rhs.attr.get())) + } + if (!(*attr.get() == *rhs.attr.get())) { return false; + } return true; } @@ -1881,10 +1903,12 @@ Interpolate::Interpolate(const std::shared_ptr& op, const GraphContext if (const auto interp = ov::as_type_ptr(op)) { is_version11 = false; const auto numInputs = inputShapes.size(); - if (numInputs != 3 && numInputs != 4) + if (numInputs != 3 && numInputs != 4) { THROW_CPU_NODE_ERR("has incorrect number of input edges"); - if (outputShapes.size() != 1) + } + if (outputShapes.size() != 1) { THROW_CPU_NODE_ERR("has incorrect number of output edges"); + } isAxesSpecified = numInputs != 3; const auto& interpAttr = interp->get_attrs(); @@ -1954,16 +1978,18 @@ Interpolate::Interpolate(const std::shared_ptr& op, const GraphContext interpAttrs.padBegin.resize(dataRank, 0); } else { interpAttrs.padBegin.resize(interpAttr.pads_begin.size()); - for (size_t i = 0; i < interpAttr.pads_begin.size(); i++) + for (size_t i = 0; i < interpAttr.pads_begin.size(); i++) { interpAttrs.padBegin[i] = static_cast(interpAttr.pads_begin[i]); + } } if (interpAttr.pads_end.empty()) { interpAttrs.padEnd.resize(dataRank, 0); } else { interpAttrs.padEnd.resize(interpAttr.pads_end.size()); - for (size_t i = 0; i < interpAttr.pads_end.size(); i++) + for (size_t i = 0; i < interpAttr.pads_end.size(); i++) { interpAttrs.padEnd[i] = static_cast(interpAttr.pads_end[i]); + } } const auto scalesNode = @@ -1985,10 +2011,12 @@ Interpolate::Interpolate(const std::shared_ptr& op, const GraphContext } else if (const auto interp = ov::as_type_ptr(op)) { is_version11 = true; const auto numInputs = inputShapes.size(); - if (numInputs != 2 && numInputs != 3) + if (numInputs != 2 && numInputs != 3) { THROW_CPU_NODE_ERR("has incorrect number of input edges"); - if (outputShapes.size() != 1) + } + if (outputShapes.size() != 1) { THROW_CPU_NODE_ERR("has incorrect number of output edges"); + } isAxesSpecified = numInputs != 2; const auto& interpAttr = interp->get_attrs(); @@ -2025,16 +2053,18 @@ Interpolate::Interpolate(const std::shared_ptr& op, const GraphContext interpAttrs.padBegin.resize(dataRank, 0); } else { interpAttrs.padBegin.resize(interpAttr.pads_begin.size()); - for (size_t i = 0; i < interpAttr.pads_begin.size(); i++) + for (size_t i = 0; i < interpAttr.pads_begin.size(); i++) { interpAttrs.padBegin[i] = static_cast(interpAttr.pads_begin[i]); + } } if (interpAttr.pads_end.empty()) { interpAttrs.padEnd.resize(dataRank, 0); } else { interpAttrs.padEnd.resize(interpAttr.pads_end.size()); - for (size_t i = 0; i < interpAttr.pads_end.size(); i++) + for (size_t i = 0; i < interpAttr.pads_end.size(); i++) { interpAttrs.padEnd[i] = static_cast(interpAttr.pads_end[i]); + } } if (isAxesSpecified) { @@ -2058,12 +2088,14 @@ Interpolate::Interpolate(const std::shared_ptr& op, const GraphContext } void Interpolate::getSupportedDescriptors() { - if (getParentEdges().size() != 2 && getParentEdges().size() != 3 && getParentEdges().size() != 4) + if (getParentEdges().size() != 2 && getParentEdges().size() != 3 && getParentEdges().size() != 4) { // v4: data, target_shape, scale, axis(optional). // v11: data, size_or_scale, axis(optional) THROW_CPU_NODE_ERR("has incorrect number of input edges"); - if (getChildEdges().empty()) + } + if (getChildEdges().empty()) { THROW_CPU_NODE_ERR("has incorrect number of output edges"); + } // get pad for (size_t i = 0; i < interpAttrs.padBegin.size(); i++) { @@ -2102,8 +2134,9 @@ void Interpolate::getSupportedDescriptors() { } void Interpolate::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } ov::element::Type inputPrecision = getOriginalInputPrecisionAtPort(DATA_ID); @@ -2116,8 +2149,9 @@ void Interpolate::initSupportedPrimitiveDescriptors() { inputPrecision = ov::element::f32; } - if (!hasHardwareSupport(inputPrecision)) + if (!hasHardwareSupport(inputPrecision)) { inputPrecision = ov::element::f32; + } // support input with rank<=3 only with float precision and planar layout. // Jit for avx2(gather is available) and ref for no-avx2 machine. @@ -2173,9 +2207,10 @@ void Interpolate::initSupportedPrimitiveDescriptors() { ->createSharedDesc(scalesType, getInputShapeAtPort(SIZE_OR_SCALE_ID_V11))); } - if (isAxesSpecified) + if (isAxesSpecified) { config.inConfs[AXES_ID_V11].setMemDesc( creatorsMap.at(LayoutType::ncsp)->createSharedDesc(axesType, getInputShapeAtPort(AXES_ID_V11))); + } } else { config.inConfs[TARGET_SHAPE_ID].setMemDesc( creatorsMap.at(LayoutType::ncsp) @@ -2183,9 +2218,10 @@ void Interpolate::initSupportedPrimitiveDescriptors() { config.inConfs[get_scale_id()].setMemDesc( creatorsMap.at(LayoutType::ncsp)->createSharedDesc(scalesType, getInputShapeAtPort(get_scale_id()))); - if (isAxesSpecified) + if (isAxesSpecified) { config.inConfs[get_axis_id()].setMemDesc( creatorsMap.at(LayoutType::ncsp)->createSharedDesc(axesType, getInputShapeAtPort(get_axis_id()))); + } } config.outConfs[0].setMemDesc( @@ -2227,20 +2263,23 @@ void Interpolate::initSupportedPrimitiveDescriptors() { if (dataRank == 4) { if (mayiuse(cpu::x64::avx512_core)) { - if (NCHWAsNHWC) + if (NCHWAsNHWC) { pushDesc(LayoutType::ncsp, jit_avx512, true); - else + } else { pushDesc(LayoutType::nspc, jit_avx512, true); + } } else if (mayiuse(cpu::x64::avx2)) { - if (NCHWAsNHWC) + if (NCHWAsNHWC) { pushDesc(LayoutType::ncsp, jit_avx2, true); - else + } else { pushDesc(LayoutType::nspc, jit_avx2, true); + } } else if (mayiuse(cpu::x64::sse41)) { - if (NCHWAsNHWC) + if (NCHWAsNHWC) { pushDesc(LayoutType::ncsp, jit_sse42, true); - else + } else { pushDesc(LayoutType::nspc, jit_sse42, true); + } } } pushDesc(LayoutType::ncsp, ref, true); @@ -2266,16 +2305,19 @@ void Interpolate::initSupportedPrimitiveDescriptors() { if (dataRank == 4 || (dataRank == 5 && interpAttrs.mode != InterpolateMode::cubic)) { if (mayiuse(cpu::x64::avx512_core)) { pushDesc(LayoutType::nspc, jit_avx512, false); - if (isBlkApplied) + if (isBlkApplied) { pushDesc(LayoutType::nCsp16c, jit_avx512, false); + } } else if (mayiuse(cpu::x64::avx2)) { pushDesc(LayoutType::nspc, jit_avx2, false); - if (isBlkApplied) + if (isBlkApplied) { pushDesc(LayoutType::nCsp8c, jit_avx2, false); + } } else { pushDesc(LayoutType::nspc, jit_sse42, false); - if (isBlkApplied) + if (isBlkApplied) { pushDesc(LayoutType::nCsp8c, jit_sse42, false); + } } } @@ -2283,10 +2325,11 @@ void Interpolate::initSupportedPrimitiveDescriptors() { // 1.ref on machine w/o avx2(no fuse) // 2.JIT kernel for avx2(gatherps is available).(with fuse) if (inputPrecision == ov::element::f32) { - if (mayiuse(cpu::x64::avx2)) + if (mayiuse(cpu::x64::avx2)) { pushDesc(LayoutType::ncsp, jit_avx2, false); - else + } else { pushDesc(LayoutType::ncsp, ref, false); + } } } } @@ -2339,16 +2382,18 @@ bool Interpolate::needPrepareParams() const { } inline int Interpolate::get_scale_id() const { - if (is_version11) + if (is_version11) { return SIZE_OR_SCALE_ID_V11; - else + } else { return SCALES_ID; + } } inline int Interpolate::get_axis_id() const { - if (is_version11) + if (is_version11) { return AXES_ID_V11; - else + } else { return AXES_ID; + } } void Interpolate::prepareParams() { @@ -2359,32 +2404,38 @@ void Interpolate::prepareParams() { } auto dstMemPtr = getDstMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isDefined()) + if (!dstMemPtr || !dstMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined destination memory"); + } auto srcMemPtr = getSrcMemoryAtPort(DATA_ID); - if (!srcMemPtr || !srcMemPtr->isDefined()) + if (!srcMemPtr || !srcMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined input memory"); + } if (interpAttrs.shapeCalcMode == InterpolateShapeCalcMode::sizes) { auto tsMemPtr = getSrcMemoryAtPort(TARGET_SHAPE_ID); - if (!tsMemPtr || !tsMemPtr->isDefined()) + if (!tsMemPtr || !tsMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined target shape memory"); + } } else { auto scaleMemPtr = getSrcMemoryAtPort(get_scale_id()); - if (!scaleMemPtr || !scaleMemPtr->isDefined()) + if (!scaleMemPtr || !scaleMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined scales memory"); + } } if (isAxesSpecified) { auto axesMemPtr = getSrcMemoryAtPort(get_axis_id()); - if (!axesMemPtr || !axesMemPtr->isDefined()) + if (!axesMemPtr || !axesMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined axes memory"); + } } const NodeDesc* selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) + if (selected_pd == nullptr) { THROW_CPU_NODE_ERR("did not set preferable primitive descriptor"); + } const auto& srcDimsOrign = srcMemPtr->getStaticDims(); const auto& dstDimsOrign = dstMemPtr->getStaticDims(); @@ -2478,10 +2529,12 @@ void Interpolate::prepareParams() { void Interpolate::createPrimitive() { auto srcMemPtr = getSrcMemoryAtPort(DATA_ID); auto dstMemPtr = getDstMemoryAtPort(0); - if (!srcMemPtr) + if (!srcMemPtr) { THROW_CPU_NODE_ERR("has null input memory"); - if (!dstMemPtr) + } + if (!dstMemPtr) { THROW_CPU_NODE_ERR("has null destination memory"); + } if (dstMemPtr->getDesc().hasLayoutType(LayoutType::ncsp)) { interpAttrs.layout = InterpolateLayoutType::planar; @@ -2496,8 +2549,9 @@ void Interpolate::createPrimitive() { interpAttrs.outPrc = dstMemPtr->getDesc().getPrecision(); if (shapesDefined() && isExecutable()) { - if (needPrepareParams()) + if (needPrepareParams()) { prepareParams(); + } updateLastInputDims(); } } @@ -3088,10 +3142,11 @@ float Interpolate::InterpolateExecutorBase::coordTransToInput(int outCoord, break; } case InterpolateCoordTransMode::pytorch_half_pixel: { - if (outShape > 1) + if (outShape > 1) { return (outCoord + 0.5f) / scale - 0.5f; - else + } else { return 0; + } break; } case InterpolateCoordTransMode::asymmetric: { @@ -3103,10 +3158,11 @@ float Interpolate::InterpolateExecutorBase::coordTransToInput(int outCoord, break; } case InterpolateCoordTransMode::align_corners: { - if (outShape > 1) + if (outShape > 1) { return outCoord * (static_cast(inShape - 1) / static_cast(outShape - 1)); - else + } else { return 0; + } break; } default: { @@ -3121,10 +3177,11 @@ int Interpolate::InterpolateExecutorBase::nearestRound(float originCoord, InterpolateNearestMode nearestMode) const { switch (nearestMode) { case InterpolateNearestMode::round_prefer_floor: { - if (originCoord == (static_cast(originCoord) + 0.5f)) + if (originCoord == (static_cast(originCoord) + 0.5f)) { return static_cast(std::floor(originCoord)); - else + } else { return static_cast(std::round(originCoord)); + } break; } case InterpolateNearestMode::round_prefer_ceil: { @@ -3140,10 +3197,11 @@ int Interpolate::InterpolateExecutorBase::nearestRound(float originCoord, break; } case InterpolateNearestMode::simple: { - if (isDownsample) + if (isDownsample) { return static_cast(std::ceil(originCoord)); - else + } else { return static_cast(originCoord); + } } default: { OPENVINO_THROW("does not support specified nearest round mode"); @@ -3451,21 +3509,26 @@ void Interpolate::InterpolateExecutorBase::buildTblCubic(const VectorDims& srcDi } float Interpolate::InterpolateExecutorBase::getPillowBilinearCoeffs(float m) { - if (m < 0.0f) + if (m < 0.0f) { m = -m; - if (m < 1.0) + } + if (m < 1.0) { return 1.0f - m; + } return 0.0f; } float Interpolate::InterpolateExecutorBase::getPillowBicubicCoeffs(float m) { float a = -0.5f; - if (m < 0.0f) + if (m < 0.0f) { m = -m; - if (m < 1.0) + } + if (m < 1.0) { return ((a + 2.0) * m - (a + 3.0)) * m * m + 1.0; - if (m < 2.0f) + } + if (m < 2.0f) { return (((m - 5) * m + 8) * m - 4) * a; + } return 0.0f; } @@ -3554,8 +3617,9 @@ void Interpolate::InterpolateExecutorBase::buildTblPillow(const VectorDims& srcD } // filterlen is maximum possible len, set others to 0 for possible uniform process(vector) - for (; ix < args.filterLen; ix++) + for (; ix < args.filterLen; ix++) { weightTbl[offset + ix] = 0.f; + } } }; @@ -3887,8 +3951,9 @@ void Interpolate::InterpolateRefExecutor::linearInterpolation(const uint8_t* in_ //} for (int iz = 0; iz < diaOD; iz++) { - if (weightOD[oz * diaOD + iz] == 0.f) + if (weightOD[oz * diaOD + iz] == 0.f) { continue; + } for (int iy = 0; iy < diaOH; iy++) { if (weightOH[oy * diaOH + iy] == 0.f) { continue; @@ -4032,8 +4097,9 @@ void Interpolate::InterpolateRefExecutor::pillowRef(const uint8_t* in_ptr_, } void Interpolate::InterpolateExecutorBase::create_pillow_working_buf(InterpolateLayoutType layout) { - if (srcDimPad5d[3] == dstDim5d[3] || srcDimPad5d[4] == dstDim5d[4]) + if (srcDimPad5d[3] == dstDim5d[3] || srcDimPad5d[4] == dstDim5d[4]) { return; + } size_t bufSize = srcDimPad5d[3] * dstDim5d[4] * srcDataSize; // IH * OW m_threads_num = parallel_get_max_threads(); if (layout == InterpolateLayoutType::planar) { diff --git a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/brgemm_kernel.cpp b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/brgemm_kernel.cpp index 4185eafc8880cd..56bdb543087fee 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/brgemm_kernel.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/brgemm_kernel.cpp @@ -74,8 +74,9 @@ BrgemmKernel::BrgemmKernel(size_t M, // don't create brgemm kernels for empty tiles if (M_ != 0 && K_ != 0 && N_ != 0) { - if (brg0BaseIdx == std::numeric_limits::max()) + if (brg0BaseIdx == std::numeric_limits::max()) { brg0BaseIdx = getBrgIdx(m, k, n); + } init_brgemm(brgemmCtx, brgKernels[getBrgIdx(m, k, n)]); } } @@ -214,8 +215,9 @@ void BrgemmKernel::init_brgemm_copy_b( brgCopyKernelConf.has_zero_point_b = false; brgCopyKernelConf.src_zp_type = dnnl::impl::cpu::aarch64::none; auto ret = create_brgemm_matmul_copy_b(brgCopyKernel, &brgCopyKernelConf); - if (ret != dnnl::impl::status_t::dnnl_success) + if (ret != dnnl::impl::status_t::dnnl_success) { THROW_ERROR("cannot create_brgemm_matmul_copy_b kernel"); + } } void BrgemmKernel::copy_buffer_b(void* b, void* scratch_b) { diff --git a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp index 66db416ec7c732..bbd0089a53f14d 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp @@ -126,27 +126,33 @@ void jit_uni_eltwise_generic::generate() { size_t min_src_size = jep.dst_size; for (size_t i = 0; i < jep.inputs_number; i++) { - if (jep.src_size[i] != 1) + if (jep.src_size[i] != 1) { min_src_size = std::min(min_src_size, jep.src_size[i]); + } } - if (jep_.oc_size > 1) + if (jep_.oc_size > 1) { min_src_size = std::min(min_src_size, jep_.oc_size); + } if (min_src_size != jep.dst_size) { bool is_valid_configuration = true; - if (jep.dst_size % min_src_size != 0) + if (jep.dst_size % min_src_size != 0) { is_valid_configuration = false; + } for (size_t i = 0; i < jep.inputs_number; i++) { - if (jep.src_size[i] != 1 && jep.src_size[i] != min_src_size && jep.src_size[i] != jep.dst_size) + if (jep.src_size[i] != 1 && jep.src_size[i] != min_src_size && jep.src_size[i] != jep.dst_size) { is_valid_configuration = false; + } } - if (jep.oc_size > 1 && jep.oc_size != min_src_size && jep.oc_size != jep.dst_size) + if (jep.oc_size > 1 && jep.oc_size != min_src_size && jep.oc_size != jep.dst_size) { is_valid_configuration = false; + } - if (!is_valid_configuration) + if (!is_valid_configuration) { OPENVINO_THROW("Eltwise jitter has invalid configuration for Eltwise node"); + } L(unroll_loop_label); { @@ -195,14 +201,17 @@ void jit_uni_eltwise_generic::generate() { store_scalar(reg_dst, sc_dst_reg, exec_prc, jep.dst_prc, j * jep.dst_prc.size()); } - for (size_t i = 0; i < jep.inputs_number; i++) - if (jep.src_size[i] == jep.dst_size) + for (size_t i = 0; i < jep.inputs_number; i++) { + if (jep.src_size[i] == jep.dst_size) { add(get_src_reg(i), get_src_reg(i), jep.src_prc[i].size() * loop_step); + } + } add(reg_dst, reg_dst, jep.dst_prc.size() * loop_step); sub(reg_work_amount, reg_work_amount, loop_step); - if (jep_.oc_size > 1 && jep_.oc_size != min_src_size) + if (jep_.oc_size > 1 && jep_.oc_size != min_src_size) { add(reg_oc_off, reg_oc_off, loop_step * sizeof(float)); + } b(AL, unroll_loop_label); } @@ -240,8 +249,9 @@ void jit_uni_eltwise_generic::generate() { add(reg_dst, reg_dst, jep.dst_prc.size() * loop_step); sub(reg_work_amount, reg_work_amount, loop_step); - if (jep_.oc_size > 1) + if (jep_.oc_size > 1) { add(reg_oc_off, reg_oc_off, loop_step * sizeof(float)); + } b(AL, main_loop_label); } @@ -276,8 +286,9 @@ void jit_uni_eltwise_generic::generate() { add(reg_dst, reg_dst, jep.dst_prc.size() * loop_step); sub(reg_work_amount, reg_work_amount, loop_step); - if (jep_.oc_size > 1) + if (jep_.oc_size > 1) { add(reg_oc_off, reg_oc_off, loop_step * sizeof(float)); + } b(AL, tail_loop_label); } @@ -699,8 +710,9 @@ std::shared_ptr jit_uni_eltwise_generic::create_eltwise_emitte OV_CASE(Algorithm::EltwiseSwish, ov::intel_cpu::aarch64::jit_swish_emitter), OV_CASE(Algorithm::EltwiseTanh, ov::intel_cpu::aarch64::jit_tanh_emitter)); - if (!ctx.emitter) + if (!ctx.emitter) { OPENVINO_THROW("Unsupported operation type '" + algToString(data.algo) + "' for Eltwise emitter"); + } return ctx.emitter; } @@ -736,19 +748,22 @@ void jit_uni_eltwise_generic::apply_post_ops() { if (ops_list_[i] == ov::intel_cpu::Type::Eltwise) { std::vector in_idxs; in_idxs.push_back(vmm_dst.getIdx()); - for (size_t j = 1; j < post_op_emitters[eltwise_post_op_idx]->get_inputs_count(); j++) + for (size_t j = 1; j < post_op_emitters[eltwise_post_op_idx]->get_inputs_count(); j++) { in_idxs.push_back(get_vmm_reg(input_idx++).getIdx()); + } std::vector out_idxs; out_idxs.push_back(vmm_dst.getIdx()); std::vector aux_vmm_idxs; - for (size_t j = 0; j < post_op_emitters[eltwise_post_op_idx]->get_aux_vecs_count(); j++) + for (size_t j = 0; j < post_op_emitters[eltwise_post_op_idx]->get_aux_vecs_count(); j++) { aux_vmm_idxs.push_back(get_aux_vmm(j).getIdx()); + } std::vector aux_gpr_idxs; - for (size_t j = 0; j < post_op_emitters[eltwise_post_op_idx]->get_aux_gprs_count(); j++) + for (size_t j = 0; j < post_op_emitters[eltwise_post_op_idx]->get_aux_gprs_count(); j++) { aux_gpr_idxs.push_back(get_aux_gpr(j).getIdx()); + } post_op_emitters[eltwise_post_op_idx]->emit_code(in_idxs, out_idxs, aux_vmm_idxs, aux_gpr_idxs); @@ -885,8 +900,9 @@ std::set> eltwise_precision_helper::get_supported_pre OV_CASE(Algorithm::EltwiseSubtract, jit_subtract_emitter), OV_CASE(Algorithm::EltwiseSwish, jit_swish_emitter), OV_CASE(Algorithm::EltwiseTanh, jit_tanh_emitter)); - if (precisions.empty()) + if (precisions.empty()) { OPENVINO_THROW("Unsupported operation type for Eltwise emitter"); + } return precisions; } diff --git a/src/plugins/intel_cpu/src/nodes/kernels/acl/gemm_kernel.cpp b/src/plugins/intel_cpu/src/nodes/kernels/acl/gemm_kernel.cpp index ce1d8a828613c0..26d31b69be90d7 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/acl/gemm_kernel.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/acl/gemm_kernel.cpp @@ -11,8 +11,9 @@ GemmKernel::GemmKernel(size_t M, size_t N, size_t K, bool b_transposed, ov::elem N(N), K(K), b_transposed(b_transposed) { - if (!one_of(inType, ov::element::f32, ov::element::f16, ov::element::bf16)) + if (!one_of(inType, ov::element::f32, ov::element::f16, ov::element::bf16)) { THROW_ERROR("brgemm kernel only supports bf16, f16 and f32"); + } if (inType == ov::element::f32) { format = arm_compute::Format::F32; @@ -43,10 +44,11 @@ arm_compute::Status GemmKernel::executeGemm(void* a, (size_t)(M * N * arm_compute::element_size_from_data_type(arm_compute::data_type_from_format(format)))); arm_compute::TensorShape bShape; - if (b_transposed) + if (b_transposed) { bShape = shapeCast({K, N}); - else + } else { bShape = shapeCast({N, K}); + } bInfo.init(bShape, format, @@ -78,10 +80,11 @@ arm_compute::Status GemmKernel::executeGemm(void* a, bTensor.allocator()->import_memory(reinterpret_cast(b)); cTensor.allocator()->import_memory(reinterpret_cast(c)); - if (out == nullptr) + if (out == nullptr) { dstTensor.allocator()->allocate(); - else + } else { dstTensor.allocator()->import_memory(out); + } if (b_transposed) aclGemmInfo.set_pretranspose_B(true); diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_memcpy.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_memcpy.cpp index 0f6a832225c548..cda19a12b634bd 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_memcpy.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_memcpy.cpp @@ -83,8 +83,9 @@ static void paged_attn_memcpy_kernel(const ov::intel_cpu::PlainTensor& k_input, size_t block_size = past_k_output.m_dims[2]; parallel_for3d(B, L1, H, [&](size_t b, size_t m, size_t h) { auto slot = slot_mapping.ptr(b)[m]; - if (slot < 0) + if (slot < 0) { return; + } auto block_number = slot / block_size; auto block_offset = slot % block_size; attn_copy(past_k_output.ptr(block_number, h, block_offset, 0), k_input.ptr(b, h, m, 0), S); @@ -102,8 +103,9 @@ static void paged_attn_memcpy_kernel(const ov::intel_cpu::PlainTensor& k_input, size_t block_size = past_k_output.m_dims[2]; parallel_for3d(B, L1, H, [&](size_t b, size_t m, size_t h) { auto slot = slot_mapping.ptr(b)[m]; - if (slot < 0) + if (slot < 0) { return; + } auto block_number = slot / block_size; auto block_offset = slot % block_size; std::memcpy(past_k_output.ptr_v(block_number, h, block_offset, 0), diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_quant.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_quant.cpp index 111dbd17db4537..331da54e7d1342 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_quant.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_quant.cpp @@ -140,8 +140,9 @@ static void quant_u8(const T* src, uint8_t* dst, size_t n, float& scale, float& float min = FLT_MAX; find_minmax(src, n, min, max); scale = (max - min) / 255; - if (scale == 0) + if (scale == 0) { scale = 0.0001f; + } zp = -min / scale; #if defined(HAVE_AVX512F) auto v_scale = _mm512_set1_ps(1 / scale); @@ -184,12 +185,13 @@ static void quant_u4(const T* src, void* dst, size_t n, float& scale, float& zp) find_minmax(src, n, min, max); auto insert_half_byte = [](uint8_t dst, uint8_t val, bool high_half) -> uint8_t { uint8_t shift = high_half ? 0 : 4; - return dst | (uint8_t)(val << shift); + return dst | static_cast(val << shift); }; auto dst_ptr = reinterpret_cast(dst); scale = (max - min) / ((1 << 4) - 1); - if (scale == 0) + if (scale == 0) { scale = 0.0001f; + } zp = -min / scale; #if defined(HAVE_AVX512F) auto v_scale = _mm512_set1_ps(1 / scale); @@ -267,7 +269,7 @@ static void quant_u4(const T* src, void* dst, size_t n, float& scale, float& zp) #define MIN(a, b) ((a) < (b) ? (a) : (b)) uint8_t src_val = MIN(15, (uint8_t)(std::round(tmp / scale + zp))); uint8_t dst_val = i % 2 == 0 ? 0 : dst_ptr[i / 2]; - dst_val = insert_half_byte(dst_val, src_val, (uint8_t)(i % 2)); + dst_val = insert_half_byte(dst_val, src_val, static_cast(i % 2)); dst_ptr[i / 2] = dst_val; } } @@ -316,8 +318,9 @@ static void paged_attn_quant_mt(const ov::intel_cpu::PlainTensor& k_src, size_t sub_byte_multiplier = 8 / v_dst.get_precision().bitwidth(); parallel_for3d(B, L1, H, [&](size_t b, size_t m, size_t h) { auto slot = slot_mapping.ptr(b)[m]; - if (slot < 0) + if (slot < 0) { return; + } auto block_number = slot / block_size; auto block_offset = slot % block_size; // The layout for per token per head: diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_quant_kernel.hpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_quant_kernel.hpp index c95976c2a3bb34..b00929a848b65e 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_quant_kernel.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_quant_kernel.hpp @@ -132,10 +132,10 @@ void attn_dequant_kernel(const uint8_t* src, TDST* dst, size_t n, float scale, f #endif auto extract_half_byte = [&](uint8_t val, bool high_half) -> uint8_t { uint8_t shift = high_half ? 0 : 4; - return (uint8_t)((val >> shift) & 0x000F); + return static_cast((val >> shift) & 0x000F); }; for (; i < n; ++i) { - float tmp = extract_half_byte(src_nc[i / 2], (uint8_t)(i % 2)); + float tmp = extract_half_byte(src_nc[i / 2], static_cast(i % 2)); tmp = (tmp - zp) * scale; dst[i] = tmp; } diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa.cpp index 1028695ee11f45..81ccd2c77602c0 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa.cpp @@ -386,7 +386,7 @@ static void attn_acc_value_block(float* out, auto extract_half_byte = [](uint8_t val, bool high_half) -> uint8_t { uint8_t shift = high_half ? 0 : 4; - return (uint8_t)((val >> shift) & 0x000F); + return static_cast((val >> shift) & 0x000F); }; for (size_t j = 0; j < block_size; j++) { dst_offset = 0; @@ -1376,8 +1376,9 @@ struct MHAHelper { if (init_alibi_lookup && (!_alibi_lookup || _alibi_lookup.m_dims[0] < kv_len)) { _alibi_lookup.resize({kv_len * 2}); - for (size_t i = 0; i < _alibi_lookup.m_dims[0]; i++) + for (size_t i = 0; i < _alibi_lookup.m_dims[0]; i++) { _alibi_lookup.ptr()[i] = -static_cast((_alibi_lookup.m_dims[0] - 1 - i)); + } } if (init_rotation_coefficient_scratch) { @@ -1656,9 +1657,11 @@ struct MHAHelper { } } // convert to dst - for (size_t pq = 0; pq < q_len; pq++) - for (size_t h = hq_beg; h < hq_end; h++) + for (size_t pq = 0; pq < q_len; pq++) { + for (size_t h = hq_beg; h < hq_end; h++) { cvt_copy(output_emb.ptr(pq, h * _SV), _output.ptr(ithr, pq, h), _SV); + } + } } // compute one token, loop along batch, head dimensions and kv_len, it's special for very long kv_len with small @@ -1696,8 +1699,9 @@ struct MHAHelper { // small batch and all batch size is same(like SDPA case) auto kv_len = past_lens.ptr()[0]; for (size_t b = 1; b < B; b++) { - if (past_lens.ptr()[b] != kv_len) + if (past_lens.ptr()[b] != kv_len) { prefer_static_loop = false; + } } } else { // for bigger batch skip the test to save the cost @@ -1982,8 +1986,9 @@ struct MHA { const auto kv_block = item.kv_block_id; auto block_number = block_indices.ptr()[block_indices_begins.ptr()[batch_in_seq] + kv_block]; - if (block_number < 0) + if (block_number < 0) { return; + } auto ithr = parallel_get_thread_num(); auto* k_ptr = k_cache.ptr(block_number, hk); @@ -2142,8 +2147,9 @@ struct MHA { const PlainTensor& block_indices_begins, const PlainTensor& alibi_slopes) { _workitems.reset(query, past_lens, subsequence_begins, _helper._block_size); - if (output_score) + if (output_score) { _helper.init_score_buffers(past_lens, subsequence_begins); + } auto nthr = static_cast(parallel_get_max_threads()); @@ -2218,25 +2224,30 @@ struct AttentionExecutor : public PagedAttentionExecutor { block_indices_begins.reset(inputs[ID_BLOCK_INDICES_BEGINS]); // [B_seq+1] scale = *inputs[ID_SCALE]->getDataAs(); sliding_window = static_cast(*inputs[ID_SLIDING_WINDOW]->getDataAs()); - if (!inputs[ID_ALIBI_SLOPES]->getShape().hasZeroDims()) + if (!inputs[ID_ALIBI_SLOPES]->getShape().hasZeroDims()) { alibi_slopes.reset(inputs[ID_ALIBI_SLOPES]); + } max_context_len = static_cast(*inputs[ID_MAX_CONTEXT_LEN]->getDataAs()); size_t inputs_size = inputs.size(); if (inputs_size > ID_ROTATED_BLOCK_INDICES) { OPENVINO_ASSERT(inputs_size >= ID_ROTATION_TRIG_LUT); - if (!inputs[ID_ROTATED_BLOCK_INDICES]->getShape().hasZeroDims()) + if (!inputs[ID_ROTATED_BLOCK_INDICES]->getShape().hasZeroDims()) { rotated_block_indices.reset(inputs[ID_ROTATED_BLOCK_INDICES]); // [num_blocks] - if (!inputs[ID_ROTATION_DELTAS]->getShape().hasZeroDims()) + } + if (!inputs[ID_ROTATION_DELTAS]->getShape().hasZeroDims()) { rotation_deltas.reset(inputs[ID_ROTATION_DELTAS]); // [num_blocks, block_size (32) || 1] - if (!inputs[ID_ROTATION_TRIG_LUT]->getShape().hasZeroDims()) + } + if (!inputs[ID_ROTATION_TRIG_LUT]->getShape().hasZeroDims()) { rotation_trig_lut.reset( inputs[ID_ROTATION_TRIG_LUT]); // [max_context_len * embedding_size], row-major layout + } } output_emb.reset(outputs[0]); - if (outputs.size() == 2) + if (outputs.size() == 2) { output_score.reset(outputs[1]); + } auto B_token = q.size(0); auto Hk = k_cache.size(1); @@ -2285,8 +2296,9 @@ struct AttentionExecutor : public PagedAttentionExecutor { subsequence_begins.assert_dims({B_seq + 1}); block_indices.assert_dims({0}, true); block_indices_begins.assert_dims({B_seq + 1}); - if (scale == 0.0f) + if (scale == 0.0f) { scale = 1.0f / sqrt(S); + } if (alibi_slopes) { alibi_slopes.assert_dims({H}); } diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa_common.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa_common.cpp index bc9438b22488d3..97c10a34d60119 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa_common.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa_common.cpp @@ -30,7 +30,7 @@ using namespace dnnl::impl::cpu::x64; void TileConfig::reset(int palette, int _startRow, const std::vector>& _rows_columnsBytes) { palette_id = palette; startRow = _startRow; - unsigned long i; + uint64_t i; for (i = 0; i < 14; i++) { reserved[i] = 0; } @@ -83,13 +83,16 @@ void JitMatMulVecAMX::generate() { mov(reg_stride_A, m_head_size * 2); mov(reg_stride_BC, 4); const int kStep = 32; - if ((m_head_size % 32) != 0) + if ((m_head_size % 32) != 0) { throw std::runtime_error("head size is not multiple of 32"); - if ((m_block_size % 16) != 0) + } + if ((m_block_size % 16) != 0) { throw std::runtime_error("block size is not multiple of 16"); + } auto num_B_tiles = m_head_size / kStep; - if (num_B_tiles > 6) + if (num_B_tiles > 6) { throw std::runtime_error("number of B tiles is bigger than 6"); + } /* B(query) head_size x 1 diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp index 6b20b996e5c501..bb4808525a14d6 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp @@ -1170,8 +1170,9 @@ static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query, if (h_group_num != H) { h_each_group_len = H / h_group_num; } - if (d_scale == 0.0f) + if (d_scale == 0.0f) { d_scale = 1.0f / sqrt(S); + } auto nthr = parallel_get_max_threads(); auto kv_len = present_key.size(2); @@ -1290,8 +1291,9 @@ static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query, T3* alibi_ptr = alibi_mask ? &alibi_mask.at({b, h, pq, 0}, true) : nullptr; uint8_t* attn_mask_ptr = nullptr; auto attn_mask_prec = attention_mask.get_precision(); - if (attention_mask) + if (attention_mask) { attn_mask_ptr = reinterpret_cast(&attention_mask.at({b, h, pq, 0}, true)); + } uint8_t* cmask_ptr = causal_mask ? &causal_mask.at({b, h, pq, 0}, true) : nullptr; attn_softmax_kernel(buf_attn_w.ptr(b, h, pq), buf_attn_w.ptr(b, h, pq), diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/softmax_kernel.hpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/softmax_kernel.hpp index 67cf6137bb8b45..afdcdec82c4291 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/softmax_kernel.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/softmax_kernel.hpp @@ -439,16 +439,19 @@ inline void scale_add2_reduce_max(float* a, a[i] += alibi_lookup[i] * alibi_slope; } - if (has_attn_mask) + if (has_attn_mask) { a[i] += attn_mask[i]; + } if (has_causal_mask) { if (select_nfltmax_at_0) { - if (causal_mask[i] == 0) + if (causal_mask[i] == 0) { a[i] = -FLT_MAX; + } } else { - if (causal_mask[i] != 0) + if (causal_mask[i] != 0) { a[i] = -FLT_MAX; + } } } @@ -1148,18 +1151,21 @@ inline void attn_softmax_kernel(float* a, if (dst_precision == ov::element::f32) { multiply_scalar(a, reinterpret_cast(a_dst), scalar, len); // apply causual mask to final result instead of attn_score - if (total_size > len) + if (total_size > len) { memset(static_cast(a_dst) + len, 0, sizeof(float) * (total_size - len)); + } } else if (dst_precision == ov::element::bf16) { multiply_scalar(a, static_cast(a_dst), scalar, len); // apply causual mask to final result instead of attn_score - if (total_size > len) + if (total_size > len) { memset(static_cast(a_dst) + len, 0, sizeof(ov::bfloat16) * (total_size - len)); + } } else { multiply_scalar(a, static_cast(a_dst), scalar, len); // apply causual mask to final result instead of attn_score - if (total_size > len) + if (total_size > len) { memset(static_cast(a_dst) + len, 0, sizeof(ov::float16) * (total_size - len)); + } } } #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp index 5defe98c55aad8..4ed540f0b0af8c 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp @@ -38,17 +38,20 @@ BrgemmKernel::BrgemmKernel(size_t M, b_transposed(b_transposed), inType(inType), b_accumulate(b_accumulate) { - if (!one_of(inType, ov::element::bf16, ov::element::f16, ov::element::f32)) + if (!one_of(inType, ov::element::bf16, ov::element::f16, ov::element::f32)) { THROW_ERROR("brgemm kernel only supports f16, bf16, f32"); + } bool is_f32 = inType == ov::element::f32; bool is_bf16 = inType == ov::element::bf16; - if (is_bf16 && !mayiuse(avx512_core_bf16)) + if (is_bf16 && !mayiuse(avx512_core_bf16)) { THROW_ERROR("brgemm bf16 kernel could only be used above avx512_bf16"); + } bool is_f16 = inType == ov::element::f16; - if (is_f16 && !mayiuse(avx512_core_fp16)) + if (is_f16 && !mayiuse(avx512_core_fp16)) { THROW_ERROR("brgemm f16 kernel could only be used above avx512_f16"); + } srcType = weiType = inType; // If isa is avx512_core_fp16, f16 is supported by upconverted to f32 @@ -69,10 +72,11 @@ BrgemmKernel::BrgemmKernel(size_t M, bool isBrgWithAMX = isAMXSupported && !is_avx_f16_only; size_t vlen; - if (mayiuse(avx512_core)) + if (mayiuse(avx512_core)) { vlen = cpu_isa_traits::vlen; - else + } else { vlen = cpu_isa_traits::vlen; + } // blocking N N_blk = !is_f32 ? 32 : std::max(N, vlen / inType.size()); N_tail = N % N_blk; @@ -109,8 +113,9 @@ BrgemmKernel::BrgemmKernel(size_t M, // don't create brgemm kernels for empty tiles if (M_ != 0 && K_ != 0 && N_ != 0) { - if (brg0BaseIdx == std::numeric_limits::max()) + if (brg0BaseIdx == std::numeric_limits::max()) { brg0BaseIdx = getBrgIdx(m, k, n); + } init_brgemm(brgemmCtx, brgKernels[getBrgIdx(m, k, n)], isBrgWithAMX); } } @@ -329,8 +334,9 @@ void BrgemmKernel::init_brgemm_copy_b( brgCopyKernelConf.has_zero_point_b = false; brgCopyKernelConf.src_zp_type = dnnl::impl::cpu::x64::none; auto ret = create_brgemm_matmul_copy_b(brgCopyKernel, &brgCopyKernelConf); - if (ret != dnnl::impl::status_t::dnnl_success) + if (ret != dnnl::impl::status_t::dnnl_success) { THROW_ERROR("cannot create_brgemm_matmul_copy_b kernel"); + } } void BrgemmKernel::copy_buffer_b(void* b, void* scratch_b) { @@ -432,8 +438,9 @@ void BrgemmKernel::callBrgemm(brgemmCtx& ctx, const void* pin1, void* pout, void* wsp) { - if (ctx.is_with_amx) + if (ctx.is_with_amx) { amx_tile_configure(ctx.palette); + } if (ctx.is_with_comp) { brgemm_post_ops_data_t post_ops_data; brgemm_kernel_execute_postops(brgKernel.get(), 1, pin0, pin1, nullptr, pout, pout, post_ops_data, wsp); diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.cpp index 5606d3902c6526..9f42741809c175 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.cpp @@ -57,10 +57,11 @@ template jitUniGatherKernel::jitUniGatherKernel(const jGatherConfParams& jcp) : jitGatherKernelBase(jcp, x64::cpu_isa_traits::vlen, indicesTypeSize), x64::jit_generator(jit_name()) { - if (jcp.dataTypeSize == 2) + if (jcp.dataTypeSize == 2) { dataTypeShift = 1; - else if (jcp.dataTypeSize == 4) + } else if (jcp.dataTypeSize == 4) { dataTypeShift = 2; + } if (isa == x64::avx2) { permMask8bitUni = permMask8bitA2; @@ -74,8 +75,9 @@ jitUniGatherKernel::jitUniGatherKernel(const jGatherConfParams& jcp) template void jitUniGatherKernel::create_ker() { auto code = x64::jit_generator::create_kernel(); - if (code != dnnl::impl::status::success) + if (code != dnnl::impl::status::success) { OPENVINO_THROW("Could not create Gather kernel. Error code: ", std::to_string(code)); + } ker_ = (decltype(ker_))jit_ker(); } @@ -134,8 +136,9 @@ void jitUniGatherKernel::generate() { if (jcp.beforeAxisSize != 1lu) { mov(regAux1, ptr[regParams + GET_OFF(beforeAxisDiff)]); uni_vmovups(vmmBeforeAxDiffB, ptr[regAux1]); - if (jcp.dataTypeSize != 1) + if (jcp.dataTypeSize != 1) { uni_vpslld(vmmBeforeAxDiffB, vmmBeforeAxDiffB, dataTypeShift); // multiply by data type size + } } if (jcp.batchDims > 0lu) { mov(regAux1, ptr[regParams + GET_OFF(srcAfterBatchSizeB)]); @@ -251,8 +254,9 @@ void jitUniGatherKernel::generate() { if (jcp.beforeAxisSize != 1lu) { mov(regAux1, ptr[regParams + GET_OFF(beforeAxisDiff)]); uni_vmovups(vmmBeforeAxDiffB, ptr[regAux1]); - if (jcp.dataTypeSize != 1) + if (jcp.dataTypeSize != 1) { uni_vpslld(vmmBeforeAxDiffB, vmmBeforeAxDiffB, dataTypeShift); // multiply by data type size + } } mov(regAux1, ptr[regParams + GET_OFF(srcAfterBatchSizeB)]); uni_vpbroadcastd(vmmSrcAfterBatchSizeB, ptr[regAux1]); @@ -341,8 +345,9 @@ void jitUniGatherKernel::normalizeRawIndices(Vmm& vRawIndices, Vmask& vpcmpgtd(kAuxMask, vmmZeros, vRawIndices); vpandn(kDstMask, kAuxMask, kDstMask); // Multiply by type size. - if (jcp.dataTypeSize > 1) + if (jcp.dataTypeSize > 1) { uni_vpslld(vRawIndices, vRawIndices, dataTypeShift); + } } template <> @@ -356,8 +361,9 @@ void jitUniGatherKernel::normalizeRawIndices(Vmm& vRawIndices, vpcmpgtd(kAuxMask, vmmAxisDim, vRawIndices); vpcmpd(kDstMask | kAuxMask, vmmZeros, vRawIndices, 2); // 2 - LE // Multiply by type size. - if (jcp.dataTypeSize > 1) + if (jcp.dataTypeSize > 1) { uni_vpslld(vRawIndices, vRawIndices, dataTypeShift); + } } template <> @@ -384,8 +390,9 @@ void jitUniGatherKernel::calcSrcShiftLong(Vmm* vAuxPool, bool shiftFi auto& kAuxMask0 = masksContainer[vAux1.getIdx()]; Xbyak::Label lIdxStride, lExit; - if (shiftFirst) + if (shiftFirst) { uni_vpaddd(vmmSpecIdxB, vmmSpecIdxB, vmmVecLenB); + } add(regIdxIter, vlen); cmp(regIdxIter, regSpecIdxSizeB); @@ -398,8 +405,9 @@ void jitUniGatherKernel::calcSrcShiftLong(Vmm* vAuxPool, bool shiftFi } vmovdqu(vDstShifts, ptr[regIndices + regAux1]); normalizeRawIndices(vDstShifts, kDstMask, kAuxMask0); - if (jcp.beforeAxisSize != 1lu) + if (jcp.beforeAxisSize != 1lu) { uni_vpaddd(vDstShifts, vDstShifts, vmmSrcBeforeAxisSumB); + } jmp(lExit, T_NEAR); L(lIdxStride); sub(regIdxIter, regSpecIdxSizeB); @@ -408,8 +416,9 @@ void jitUniGatherKernel::calcSrcShiftLong(Vmm* vAuxPool, bool shiftFi vpcmpgtd(vAux0, vmmSpecIdxSizeB, vmmSpecIdxB); vpandn(vAux1, vAux0, vmmSpecIdxSizeB); uni_vpsubd(vAux1, vmmSpecIdxB, vAux1); - if (jcp.batchDims > 0lu) + if (jcp.batchDims > 0lu) { uni_vpaddd(vAux1, vmmIdxBatchSumB, vAux1); + } uni_vpsubd(vmmSpecIdxB, vmmSpecIdxB, vmmSpecIdxSizeB); } else { if (jcp.batchDims > 0lu) { @@ -476,8 +485,9 @@ void jitUniGatherKernel::calcSrcShiftLong(Vmm* vAuxPool, bool auto& kAuxMask1 = masksContainer[vAux1.getIdx() + 1]; Xbyak::Label lIdxStride, lExit; - if (shiftFirst) + if (shiftFirst) { uni_vpaddd(vmmSpecIdxB, vmmSpecIdxB, vmmVecLenB); + } add(regIdxIter, vlen); cmp(regIdxIter, regSpecIdxSizeB); @@ -490,8 +500,9 @@ void jitUniGatherKernel::calcSrcShiftLong(Vmm* vAuxPool, bool } vmovdqu64(vDstShifts, ptr[regIndices + regAux1]); normalizeRawIndices(vDstShifts, kDstMask, kAuxMask0); - if (jcp.beforeAxisSize != 1lu) + if (jcp.beforeAxisSize != 1lu) { uni_vpaddd(vDstShifts, vDstShifts, vmmSrcBeforeAxisSumB); + } jmp(lExit, T_NEAR); L(lIdxStride); sub(regIdxIter, regSpecIdxSizeB); @@ -569,14 +580,16 @@ void jitUniGatherKernel::calcSrcShiftShort(Vmm* vAuxPool, bool shiftFirst) auto& vAux0 = vAuxPool[2]; if (shiftFirst) { - if (jcp.beforeAxisSize != 1lu) + if (jcp.beforeAxisSize != 1lu) { uni_vpaddd(vmmSrcBeforeAxisSumB, vmmSrcBeforeAxisSumB, vmmBeforeAxDiffB); + } // No sense to permute if specIdxSize is one of {1, 2, 4, 8, 16}. 0 is reserved for dynamic case. if (jcp.specIdxSize != 1 && jcp.specIdxSize != 2 && jcp.specIdxSize != 4 && jcp.specIdxSize != 8 && jcp.specIdxSize != 16) { vpermd(vmmSpecIdxB, vmmPermIdxMask, vmmSpecIdxB); - if (jcp.beforeAxisSize != 1lu) + if (jcp.beforeAxisSize != 1lu) { vpermd(vmmBeforeAxDiffB, vmmPermIdxMask, vmmBeforeAxDiffB); + } } } @@ -599,8 +612,9 @@ void jitUniGatherKernel::calcSrcShiftShort(Vmm* vAuxPool, bool shiftFirst) auto& kAuxMask0 = masksContainer[vAux0.getIdx()]; normalizeRawIndices(vDstShifts, kDstMask, kAuxMask0); - if (jcp.beforeAxisSize != 1lu) + if (jcp.beforeAxisSize != 1lu) { uni_vpaddd(vDstShifts, vDstShifts, vmmSrcBeforeAxisSumB); + } } // Requires vAuxPool length 4. @@ -623,8 +637,9 @@ void jitUniGatherKernel::calcSrcShiftShortBlock(Vmm* vAuxPool, bool shiftFi if (jcp.afterAxisSize != 1 && jcp.afterAxisSize != 2 && jcp.afterAxisSize != 4 && jcp.afterAxisSize != 8 && jcp.afterAxisSize != 16) { vpermd(vmmAfterAxisIdxB, vmmAfterAxisPermMask, vmmAfterAxisIdxB); - if (jcp.specIdxSize != 1) + if (jcp.specIdxSize != 1) { vpermd(vmmSpecIdxDiff, vmmAfterAxisPermMask, vmmSpecIdxDiff); + } } if (jcp.beforeAxisSize != 1lu) { @@ -633,8 +648,9 @@ void jitUniGatherKernel::calcSrcShiftShortBlock(Vmm* vAuxPool, bool shiftFi uni_vpaddd(vmmSrcBeforeAxisSumB, vmmSrcBeforeAxisSumB, vmmBeforeAxDiffB); uni_vmovups(vAux1, vmmSrcBeforeAxisSumB); if (specIdxAndAfterAxisSize != 1 && specIdxAndAfterAxisSize != 2 && specIdxAndAfterAxisSize != 4 && - specIdxAndAfterAxisSize != 8 && specIdxAndAfterAxisSize != 16) + specIdxAndAfterAxisSize != 8 && specIdxAndAfterAxisSize != 16) { vpermd(vmmBeforeAxDiffB, vmmBeforeAxPermMask, vmmBeforeAxDiffB); + } } else { Xbyak::Label lBeforeAxStep, lBeforeAxStepEnd; add(rSpecIdxAndAfterAxIterB, idxElPerVec * jcp.dataTypeSize); @@ -713,8 +729,9 @@ void jitUniGatherKernel::calcSrcShiftShortBlock(Vmm* vAuxPool, bool shiftFi vpmulld(vDstShifts, vDstShifts, vmmAfterAxisSize); uni_vpaddd(vDstShifts, vDstShifts, vmmAfterAxisIdxB); } - if (jcp.beforeAxisSize != 1lu) + if (jcp.beforeAxisSize != 1lu) { uni_vpaddd(vDstShifts, vDstShifts, vAux1); + } } template @@ -722,12 +739,13 @@ void jitUniGatherKernel::process(bool isShortIdx, bool blocked) { Xbyak::Label lTailProc, lEndProc; cmp(regWorkAmount, dataElPerVec); jl(lTailProc, T_NEAR); - if (jcp.dataTypeSize == 4) + if (jcp.dataTypeSize == 4) { process32b(isShortIdx, blocked); - else if (jcp.dataTypeSize == 2) + } else if (jcp.dataTypeSize == 2) { process16b(isShortIdx, blocked); - else if (jcp.dataTypeSize == 1) + } else if (jcp.dataTypeSize == 1) { process8b(isShortIdx, blocked); + } jmp(lEndProc, T_NEAR); L(lTailProc); tail(isShortIdx, false, blocked); @@ -1018,8 +1036,9 @@ void jitUniGatherKernel::fillRestWorkMask(Vmask& kDstMask, cmp(rAux0, 0); je(lEnd, T_NEAR); - if (i % 4 == 0) + if (i % 4 == 0) { uni_vmovups(xmmAux, xmmZeros); + } vpinsrd(xmmAux, xmmAux, rOnes, i % 4); vinserti128(kDstMask, kDstMask, xmmAux, i / 4); @@ -1036,21 +1055,23 @@ void jitUniGatherKernel::storeVectorPart(const Xbyak::Reg64& rDst, Xbyak::Label lEnd; Xbyak::Xmm xAux(vAux.getIdx()); for (size_t j = 0; j < vlen / vlenXmm; j++) { - if (isa == x64::avx2) + if (isa == x64::avx2) { vextracti128(xAux, vmmSrc, j); - else if (isa == x64::avx512_core) + } else if (isa == x64::avx512_core) { vextracti64x2(xAux, vmmSrc, j); + } for (int k = 0; k < 4; k++) { cmp(rToStoreCounter, 0); jle(lEnd, T_NEAR); - if (jcp.dataTypeSize == 4) + if (jcp.dataTypeSize == 4) { uni_vpextrd(ptr[rDst], xAux, k); - else if (jcp.dataTypeSize == 2) + } else if (jcp.dataTypeSize == 2) { uni_vpextrw(ptr[rDst], xAux, k * 2); - else if (jcp.dataTypeSize == 1) + } else if (jcp.dataTypeSize == 1) { uni_vpextrb(ptr[rDst], xAux, k * 4); + } add(rDst, jcp.dataTypeSize); sub(rToStoreCounter, 1); @@ -1074,9 +1095,10 @@ void jitUniGatherKernel::fillVlenVector() { template bool jitUniGatherKernel::isSupportedConfiguration(uint64_t afterAxisSize) { if (!jcp.dynamicShapes && afterAxisSize <= idxElPerVec) { - if (afterAxisSize > 1 && isa == x64::avx2 && (jcp.dataTypeSize == 1 || jcp.dataTypeSize == 2)) + if (afterAxisSize > 1 && isa == x64::avx2 && (jcp.dataTypeSize == 1 || jcp.dataTypeSize == 2)) { // There are no enough registers for these cases. return false; + } return true; } diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.cpp index dd135fb3b7aabc..3a7e9f68049191 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.cpp @@ -15,17 +15,19 @@ namespace kernel { template GridSampleKernel::GridSampleKernel(const GridSampleKernelConfParams& jcp) : GridSampleKernelBase(jit_name(), jcp, isa, x64::cpu_isa_traits::vlen) { - if (dataTypeSize == 2) + if (dataTypeSize == 2) { dataTypeShift = 1; - else if (dataTypeSize == 4) + } else if (dataTypeSize == 4) { dataTypeShift = 2; + } } template void GridSampleKernel::create_ker() { auto code = x64::jit_generator::create_kernel(); - if (code != dnnl::impl::status::success) + if (code != dnnl::impl::status::success) { OPENVINO_THROW("Could not create GridSample kernel. Error code: ", std::to_string(code)); + } ker_ = (decltype(ker_))jit_ker(); } @@ -309,8 +311,9 @@ void GridSampleKernel::tail() { denormalizeRawCoordinates(vWCoord, vHCoord); interpolation(vWCoord, vHCoord, true); - if (dataTypeSize > 1) + if (dataTypeSize > 1) { sal(regWorkAmount, dataTypeShift); // Multiply by source data type size. + } add(regDst, regWorkAmount); L(lEnd); @@ -429,7 +432,7 @@ void GridSampleKernel::getTailCoordinates(const Vmm& vHCoord, jle(lEnd, T_NEAR); fillRestWorkMask(kTailMask, rAux); - uni_vmovups((Vmm)vAux | kTailMask, ptr[regGrid]); + uni_vmovups(Vmm(vAux) | kTailMask, ptr[regGrid]); vpermd(vAux, vGridPermMask, vAux); Xbyak::Ymm ymmAux(vAux.getIdx()); vshuff64x2(vWCoord, vWCoord, vAux, 0B01000100); // Extract X component @@ -446,8 +449,9 @@ void GridSampleKernel::getTailCoordinates(const Vmm& vHCoord, } L(lGridShift); - if (dataTypeSize > 1) + if (dataTypeSize > 1) { sal(rAux, dataTypeShift); // Multiply by source data type size. + } add(regGrid, rAux); L(lEnd); @@ -502,8 +506,9 @@ void GridSampleKernel::getTailCoordinates(const Vmm& vHCoord, const V } L(lGridShift); - if (dataTypeSize > 1) + if (dataTypeSize > 1) { sal(rAux, dataTypeShift); // Multiply by source data type size. + } add(regGrid, rAux); L(lEnd); @@ -524,10 +529,11 @@ void GridSampleKernel::getTailCoordinates(const Vmm& vHCoord, const Vm cmp(rGridRest, 0); jle(lEnd, T_NEAR); - if (gridTypeSize == 4) + if (gridTypeSize == 4) { pinsrd(i % 2 == 0 ? xmmWCoord : xmmHCoord, ptr[regGrid], i / 2); - else if (gridTypeSize == 2) + } else if (gridTypeSize == 2) { pinsrw(i % 2 == 0 ? xmmWCoord : xmmHCoord, ptr[regGrid], i / 2); + } add(regGrid, gridTypeSize); dec(rGridRest); @@ -543,10 +549,11 @@ void GridSampleKernel::getTailCoordinates(const Vmm& vHCoord, const Vm cmp(rGridRest, 0); jle(lLoop2End, T_NEAR); - if (gridTypeSize == 4) + if (gridTypeSize == 4) { pinsrd(i % 2 == 0 ? xmmWCoord : xmmHCoord, ptr[regGrid], i / 2); - else if (gridTypeSize == 2) + } else if (gridTypeSize == 2) { pinsrw(i % 2 == 0 ? xmmWCoord : xmmHCoord, ptr[regGrid], i / 2); + } add(regGrid, gridTypeSize); dec(rGridRest); @@ -600,8 +607,9 @@ void GridSampleKernel::getTailCoordinates(const Vmm& vHCoord, const } L(lGridShift); - if (dataTypeSize > 1) + if (dataTypeSize > 1) { sal(rAux, dataTypeShift); // Multiply by source data type size. + } add(regGrid, rAux); L(lEnd); @@ -877,8 +885,9 @@ void GridSampleKernel::reflectionPadding(const Vmm& vCoordDst, } else { const auto& vSrcDimMul2F = dim == coord::w ? vSrcWidthMul2F : vSrcHeightMul2F; // (x % D2 + D2) % D2 - if (vCoordDst.getIdx() != vCoordOrigin.getIdx()) + if (vCoordDst.getIdx() != vCoordOrigin.getIdx()) { uni_vmovups(vCoordDst, vCoordOrigin); + } uni_vdivps(vAux, vCoordDst, vSrcDimMul2F); uni_vroundps(vAux, vAux, 0x3); // Truncation uni_vfnmadd231ps(vCoordDst, vAux, vSrcDimMul2F); // x % D2 @@ -950,8 +959,9 @@ void GridSampleKernel::reflectionPadding(const Vmm& vCoordDst, const Vmm& v uni_vsubps(vAux0, vCoordDst, vMul2Sub1); // abs(x) % D21 - D21 } else { // x' = (x % D2 + D2) % D2 - D21 - if (vCoordDst.getIdx() != vCoordOrigin.getIdx()) + if (vCoordDst.getIdx() != vCoordOrigin.getIdx()) { uni_vmovups(vCoordDst, vCoordOrigin); + } Vmm vMul2; if (dim == coord::w) { if (vSrcWidthMul2F.isInitialized()) { @@ -1242,10 +1252,11 @@ void GridSampleKernel::nearestInterpolation(const Vmm& vWCoord, const Vmm& } if (jcp.paddingMode == GridSamplePaddingMode::ZEROS) { - if (isa == x64::avx512_core && tail) + if (isa == x64::avx512_core && tail) { uni_kandd(kAuxMask, kTailMask, kGatherMask); - else + } else { uni_kmovd(kAuxMask, kGatherMask); + } } if (!tail) { @@ -1542,8 +1553,9 @@ void GridSampleKernel::bilinearInterpolation(const Vmm& vWCoord, const Vmm& // (y; x + 1) if (jcp.paddingMode == GridSamplePaddingMode::ZEROS) { uni_vpaddd(shift10, shift00, ptr[rTypeSize]); - if (isa == x64::avx2) + if (isa == x64::avx2) { uni_vmovups(vGatherMask, vMask01); + } } gatherdd(vAux, rSrcTmp, @@ -1568,8 +1580,9 @@ void GridSampleKernel::bilinearInterpolation(const Vmm& vWCoord, const Vmm& mov(rSrcWidth, ptr[regParams + GET_OFF(srcWidthB)]); uni_vpaddd(shift10, shift10, ptr[rSrcWidth]); } - if (isa == x64::avx2) + if (isa == x64::avx2) { uni_vmovups(vGatherMask, vMask11); + } } gatherdd(vAux, rSrcTmp, @@ -1584,8 +1597,9 @@ void GridSampleKernel::bilinearInterpolation(const Vmm& vWCoord, const Vmm& // (y + 1; x) if (jcp.paddingMode == GridSamplePaddingMode::ZEROS) { uni_vpsubd(shift10, shift10, ptr[rTypeSize]); - if (isa == x64::avx2) + if (isa == x64::avx2) { uni_vmovups(vGatherMask, vMask10); + } } gatherdd(vQ1, rSrcTmp, @@ -1724,8 +1738,9 @@ void GridSampleKernel::bicubicInterpolation(const Vmm& vWCoord uni_vaddps(vSrcShift, vSrcShift0, vSrcShift); } uni_vcvtps2dq(vSrcShift, vSrcShift); - if (dataTypeSize > 1) + if (dataTypeSize > 1) { uni_vpslld(vSrcShift, vSrcShift, dataTypeShift); + } gatherdd(vAux, rSrcTmp, vSrcShift, kAuxMask, useMask, zeroFill); if (jcp.inDataPrc == ov::element::i32) { uni_vcvtdq2ps(vAux, vAux); @@ -1748,8 +1763,9 @@ void GridSampleKernel::bicubicInterpolation(const Vmm& vWCoord uni_vaddps(vSrcShift, vSrcShift0, vSrcShift); } uni_vcvtps2dq(vSrcShift, vSrcShift); - if (dataTypeSize > 1) + if (dataTypeSize > 1) { uni_vpslld(vSrcShift, vSrcShift, dataTypeShift); + } gatherdd(vAux, rSrcTmp, vSrcShift, kAuxMask, useMask, zeroFill); if (jcp.inDataPrc == ov::element::i32) { uni_vcvtdq2ps(vAux, vAux); @@ -2049,8 +2065,9 @@ void GridSampleKernel::bicubicInterpolation(const Vmm& vWCoord, const Vmm& template void GridSampleKernel::dataTypeShiftPs2Dq(const Vmm& vDst, const Vmm& vSrc) { - if (dataTypeSize == 1) + if (dataTypeSize == 1) { return; + } if (isa == x64::avx) { // vpslld works just with XMM for AVX, so use vmulps for YMM auto rAux = getReg64(); @@ -2061,8 +2078,9 @@ void GridSampleKernel::dataTypeShiftPs2Dq(const Vmm& vDst, const Vmm& vSrc) uni_vcvtps2dq(vDst, vDst); } else { uni_vcvtps2dq(vDst, vSrc); - if (dataTypeSize > 1) + if (dataTypeSize > 1) { uni_vpslld(vDst, vDst, dataTypeShift); // multiply by source data type size. + } } } @@ -2101,8 +2119,9 @@ void GridSampleKernel::hwShiftPs2dq(const Vmm& vDst, const Vmm& vHCoord, co uni_vcvtps2dq(vDst, vDst); } else { uni_vcvtps2dq(vDst, vDst); - if (dataTypeSize > 1) + if (dataTypeSize > 1) { uni_vpslld(vDst, vDst, dataTypeShift); // multiply by source data type size. + } } } diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel.cpp index 8c35b15500fee9..16dcadbf95bcc0 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel.cpp @@ -28,8 +28,9 @@ bool isRegAllocable(int id) { template const RegType& reserveReg(jit_kernel::reg_indices& freeRegs, const registers& regs) { - if (freeRegs.empty()) + if (freeRegs.empty()) { throw std::runtime_error("No free registers"); + } const auto idx = freeRegs.back(); freeRegs.pop_back(); return regs[idx]; @@ -43,8 +44,9 @@ void freeReg(jit_kernel::reg_indices& freeRegs, const registers& regs, // if (it != freeRegs.end()) // throw std::runtime_error("Some register was freed twice"); freeRegs.emplace_back(idx); - if (freeRegs.size() > regs.size()) + if (freeRegs.size() > regs.size()) { OPENVINO_THROW("Some register was freed twice"); + } } const registers& x64regs() { @@ -235,10 +237,12 @@ ov::element::Type type2precision() { } cpu_isa_t get_current_isa() { - if (mayiuse(cpu_isa_t::avx512_core)) + if (mayiuse(cpu_isa_t::avx512_core)) { return cpu_isa_t::avx512_core; - if (mayiuse(cpu_isa_t::avx2)) + } + if (mayiuse(cpu_isa_t::avx2)) { return cpu_isa_t::avx2; + } return cpu_isa_t::sse41; } @@ -282,7 +286,7 @@ const Xbyak::Reg64& stack_frame::pointer() const { } void stack_frame::clear() const { - const size_t end = _size & ~(size_t)7u; + const size_t end = _size & ~static_cast(7u); _kernel.foreach ( 0, @@ -300,8 +304,9 @@ void stack_frame::clear() const { } const void* consts_table::store(const void* data, size_t size) { - if (size > chunk_size) + if (size > chunk_size) { throw std::runtime_error("Data size is too large"); + } const size_t capacity = _chunks.size() * chunk_size; if (size > capacity - _size) { _size = _chunks.size() * chunk_size; @@ -321,8 +326,9 @@ jit_kernel::jit_kernel(const char* name) : jit_generator(name) { _free_rmmregs.reserve(16); for (int reg = Operand::Code::RAX; reg <= Operand::Code::R15; ++reg) { - if (isRegAllocable(reg)) + if (isRegAllocable(reg)) { _free_x64regs.emplace_back(reg); + } _free_rmmregs.emplace_back(reg); } } @@ -400,8 +406,9 @@ void jit_kernel::free(const Zmm& reg) { void jit_kernel::postamble() { jit_generator::postamble(); for (const auto& emitter : _emitters) { - if (emitter.second) + if (emitter.second) { emitter.second->emit_data(); + } } } @@ -441,17 +448,20 @@ jit_kernel::stack_frame jit_kernel::stack(size_t size, uint32_t alignment) { void jit_kernel::uni_vpermps(const Xmm& x1, const uint8_t mask[4], const Operand& op) { uint8_t imm8 = 0; - for (size_t i = 0; i < 4; ++i) + for (size_t i = 0; i < 4; ++i) { imm8 |= mask[i] << (i * 2); - if (op != x1) + } + if (op != x1) { movdqu(x1, op); + } shufps(x1, op, imm8); } void jit_kernel::uni_vpermps(const Ymm& y1, const uint8_t mask[8], const Operand& op) { int data[8]; - for (size_t i = 0; i < 8; ++i) + for (size_t i = 0; i < 8; ++i) { data[i] = mask[i]; + } auto mreg = var(); mreg = data; vpermps(y1, mreg, op); @@ -459,8 +469,9 @@ void jit_kernel::uni_vpermps(const Ymm& y1, const uint8_t mask[8], const Operand void jit_kernel::uni_vpermps(const Zmm& z1, const uint8_t mask[16], const Operand& op) { int data[16]; - for (size_t i = 0; i < 16; ++i) + for (size_t i = 0; i < 16; ++i) { data[i] = mask[i]; + } auto mreg = var(); mreg = data; vpermps(z1, mreg, op); diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel.hpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel.hpp index c170086af70bb7..a55c3da5dbc71d 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel.hpp @@ -186,8 +186,9 @@ class if_expression { ~if_expression() { try { - if (!_is_exit_valid) + if (!_is_exit_valid) { _expr._kernel.assignL(_exit, _else); + } } catch (...) { } } @@ -609,10 +610,11 @@ struct jit_kernel : public dnnl::impl::cpu::x64::jit_generator { using traits = internal::reg_traits; using reg_type = typename traits::type; const auto& res = reserve(); - if (sizeof(T) < traits::size) + if (sizeof(T) < traits::size) { movzx(res, argPtr(member)); - else + } else { mov(res, argPtr(member)); + } return {*this, internal::make_shared(res, *this)}; } @@ -621,10 +623,11 @@ struct jit_kernel : public dnnl::impl::cpu::x64::jit_generator { using traits = internal::reg_traits; using reg_type = typename traits::type; const auto& res = reserve(); - if (sizeof(T) < traits::size) + if (sizeof(T) < traits::size) { movzx(res, argPtr(member)); - else + } else { mov(res, argPtr(member)); + } return {*this, internal::make_shared(res, *this)}; } @@ -891,10 +894,11 @@ boolean_expression::boolean_expression(jit_kernel& kernel, type t, const shar template void boolean_expression::cmp(const Xbyak::Label& exit) const { - if (_rhs) + if (_rhs) { _kernel.cmp(*_lhs, *_rhs); - else + } else { _kernel.cmp(*_lhs, _rvalue); + } switch (_type) { case type::eq: { diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp index c7a49ffd0feedf..8720845a8dc8e9 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp @@ -220,10 +220,12 @@ void JitKernelBase::gatherdd(const Xbyak::Xmm& v_dst, if (kReadMask.getIdx() == 0) { OPENVINO_THROW("The vpgatherdd instruction cannot use the register k0 as mask."); } - if (!useMask) + if (!useMask) { kxnord(kReadMask, kReadMask, kReadMask); - if (zeroFill) + } + if (zeroFill) { uni_vpxor(v_dst, v_dst, v_dst); + } vpgatherdd(v_dst | kReadMask, ptr[rSrcPtr + vSrcShift]); } @@ -238,12 +240,14 @@ void JitKernelBase::gatherdd(const Xbyak::Xmm& v_dst, vSrcShift.getIdx() == vReadMask.getIdx()) { OPENVINO_THROW("Any pair of the index, mask, or destination registers cannot be the same."); } - if (zeroFill) + if (zeroFill) { pxor(v_dst, v_dst); // Don't use vpxor. It zeros the rest of the YMM register. + } if (isValidIsa(x64::avx2)) { - if (!useMask) + if (!useMask) { uni_vpcmpeqd(vReadMask, vReadMask, vReadMask); + } vpgatherdd(v_dst, ptr[rSrcPtr + vSrcShift], vReadMask); } else { @@ -261,8 +265,9 @@ void JitKernelBase::gatherdd(const Xbyak::Xmm& v_dst, uni_vpextrd(r32Aux, vSrcShift, i); pinsrd(v_dst, ptr[rSrcPtr + rAux], i); - if (useMask) + if (useMask) { L(lLoopNext); + } } } } @@ -278,10 +283,12 @@ void JitKernelBase::gatherdd(const Xbyak::Ymm& v_dst, OPENVINO_THROW("Any pair of the index, mask, or destination registers cannot be the same."); } if (isValidIsa(x64::avx2)) { - if (!useMask) + if (!useMask) { uni_vpcmpeqd(vReadMask, vReadMask, vReadMask); - if (zeroFill) + } + if (zeroFill) { uni_vpxor(v_dst, v_dst, v_dst); + } vpgatherdd(v_dst, ptr[rSrcPtr + vSrcShift], vReadMask); } else { @@ -292,8 +299,9 @@ void JitKernelBase::gatherdd(const Xbyak::Ymm& v_dst, vperm2f128(v_dst, v_dst, v_dst, 0x1); vperm2f128(vSrcShift, vSrcShift, vSrcShift, 0x1); - if (useMask) + if (useMask) { vperm2f128(vReadMask, vReadMask, vReadMask, 0x1); + } } } } @@ -474,22 +482,24 @@ void JitKernelBase::load(const Xbyak::Xmm& v_dst, } const uint8_t elPerVec = x64::cpu_isa_traits::vlen / typeSize; Xbyak::Label lEnd; - if (zeroFilling) + if (zeroFilling) { pxor(v_dst, v_dst); + } for (uint8_t i = 0; i < elPerVec; i++) { cmp(rLoadNum, i); jle(lEnd, T_NEAR); const size_t offset = i * typeSize; - if (typeSize == 1) + if (typeSize == 1) { pinsrb(v_dst, ptr[srcAddr.getRegExp() + offset], i); - else if (typeSize == 2) + } else if (typeSize == 2) { pinsrw(v_dst, ptr[srcAddr.getRegExp() + offset], i); - else if (typeSize == 4) + } else if (typeSize == 4) { pinsrd(v_dst, ptr[srcAddr.getRegExp() + offset], i); - else if (typeSize == 8) + } else if (typeSize == 8) { pinsrq(v_dst, ptr[srcAddr.getRegExp() + offset], i); + } } L(lEnd); } @@ -504,8 +514,9 @@ void JitKernelBase::load(const Xbyak::Ymm& v_dst, } const size_t elPerXmm = x64::cpu_isa_traits::vlen / typeSize; Xbyak::Label lEnd; - if (zeroFilling) + if (zeroFilling) { uni_vpxor(v_dst, v_dst, v_dst); + } Xbyak::Xmm xmmDst(v_dst.getIdx()); for (size_t i = 0lu; i < 2lu; i++) { @@ -518,14 +529,15 @@ void JitKernelBase::load(const Xbyak::Ymm& v_dst, jle(i == 0 ? lEnd : lPerm, T_NEAR); const size_t offset = offset0 + j * typeSize; - if (typeSize == 1) + if (typeSize == 1) { pinsrb(xmmDst, ptr[srcAddr.getRegExp() + offset], j); - else if (typeSize == 2) + } else if (typeSize == 2) { pinsrw(xmmDst, ptr[srcAddr.getRegExp() + offset], j); - else if (typeSize == 4) + } else if (typeSize == 4) { pinsrd(xmmDst, ptr[srcAddr.getRegExp() + offset], j); - else if (typeSize == 8) + } else if (typeSize == 8) { pinsrq(xmmDst, ptr[srcAddr.getRegExp() + offset], j); + } } L(lPerm); @@ -670,8 +682,9 @@ void JitKernelBase::memMovDD(const Xbyak::Reg64& rDst, } vperm2f128(vSrcShift, vSrcShift, vSrcShift, 0x1); - if (useMask) + if (useMask) { vperm2f128(vReadMask, vReadMask, vReadMask, 0x1); + } } } L(lEnd); diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/mlp_kernel.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/mlp_kernel.cpp index 16f41f7476249e..3e622e6ee0fe40 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/mlp_kernel.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/mlp_kernel.cpp @@ -287,8 +287,9 @@ repackB(Tdst* dst, ov::float16* src, int N_stride, int N, int K) { if (N == 16 && K == 32) { // SIMD optimized version ov::Extensions::Cpu::XARCH::llm_mlp_transpose_epi32_16x16(dst, src, N_stride * sizeof(Tdst)); - if (std::is_same::value) + if (std::is_same::value) { fp16_to_bf16(dst); + } return; } diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/mlp_kernel.hpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/mlp_kernel.hpp index cc908dd82b6295..fb8f5b152ff35a 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/mlp_kernel.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/mlp_kernel.hpp @@ -55,10 +55,11 @@ class MKernel : public dnnl::impl::cpu::x64::jit_generator { int m_M_hint; MKernel(int M_hint, TMUL_TYPE tmul_type) : jit_generator("MKernel"), m_tmul_type(tmul_type), m_M_hint(M_hint) { - if (m_tmul_type == TMUL_TYPE::FP16 || m_tmul_type == TMUL_TYPE::BF16) + if (m_tmul_type == TMUL_TYPE::FP16 || m_tmul_type == TMUL_TYPE::BF16) { m_tile_reg_ksize = 32; - else + } else { m_tile_reg_ksize = 64; + } setup(M_hint); } @@ -198,10 +199,12 @@ struct Work { static MKernel jit_amx_bf16(BM, TMUL_TYPE::BF16); static MKernel jit_amx_f16(BM, TMUL_TYPE::FP16); static MKernel jit_amx_i8(BM, TMUL_TYPE::SSD); - if (quant_i8) + if (quant_i8) { return jit_amx_i8; - if (is_f16) + } + if (is_f16) { return jit_amx_f16; + } return jit_amx_bf16; } @@ -209,10 +212,12 @@ struct Work { static MKernel jit_amx_bf16(16, TMUL_TYPE::BF16); static MKernel jit_amx_f16(16, TMUL_TYPE::FP16); static MKernel jit_amx_i8(16, TMUL_TYPE::SSD); - if (quant_i8) + if (quant_i8) { return jit_amx_i8; - if (is_f16) + } + if (is_f16) { return jit_amx_f16; + } return jit_amx_bf16; } @@ -229,8 +234,9 @@ struct Work { auto* pw_temp = pw; for (int n = n0; n < n1; n++, pw_temp += stride_in_bytes / sizeof(Tsrc)) { float fsum = 0; - for (int k = k0; k < k1; k++) + for (int k = k0; k < k1; k++) { fsum += pw_temp[k]; + } *p_wsum_per_oc++ = fsum; } } @@ -268,14 +274,16 @@ struct Work { for (int n = n0; n < n1; n += 32) { for (int dn = 0; dn < 16; dn++, pw1_temp += stride_temp) { float fsum = 0; - for (int k = k0; k < k1; k++) + for (int k = k0; k < k1; k++) { fsum += pw1_temp[k]; + } *p_wsum_per_oc++ = fsum; } for (int dn = 0; dn < 16; dn++, pw2_temp += stride_temp) { float fsum = 0; - for (int k = k0; k < k1; k++) + for (int k = k0; k < k1; k++) { fsum += pw2_temp[k]; + } *p_wsum_per_oc++ = fsum; } } diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/mlp_utils.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/mlp_utils.cpp index bb659f3933bda0..f0e03da9ebc66e 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/mlp_utils.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/mlp_utils.cpp @@ -34,10 +34,12 @@ void llm_mlp_quantize_to_i8(T* psrc, bool asym) { auto clamp_i8 = [](float x) { auto v = static_cast(std::round(x)); - if (v < -128) + if (v < -128) { return -128; - if (v > 127) + } + if (v > 127) { return 127; + } return v; }; diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/non_max_suppression.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/non_max_suppression.cpp index bd3058ac151d39..e4de7570461dec 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/non_max_suppression.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/non_max_suppression.cpp @@ -294,8 +294,9 @@ template void NonMaxSuppression::suppressed_by_iou(bool is_scalar) { if (x64::mayiuse(x64::avx512_core)) { vcmpps(k_mask, vmm_temp3, vmm_iou_threshold, 0x0D); // _CMP_GE_OS. vcmpps w/ kmask only on V5 - if (is_scalar) + if (is_scalar) { kandw(k_mask, k_mask, k_mask_one); + } kortestw(k_mask, k_mask); // bitwise check if all zero } else if (x64::mayiuse(x64::avx)) { // vex instructions with xmm on avx and ymm on avx2 @@ -348,10 +349,11 @@ void NonMaxSuppression::suppressed_by_score() { template void NonMaxSuppression::iou(int ele_num) { auto load = [&](Xbyak::Reg64 reg_src, Vmm vmm_dst) { - if (ele_num != scalar_step && ele_num != vector_step) + if (ele_num != scalar_step && ele_num != vector_step) { OPENVINO_THROW("NMS JIT implementation supports load emitter with only element count scalar_step or " "vector_step! Get: ", ele_num); + } const auto& load_emitter = ele_num == 1 ? load_scalar_emitter : load_vector_emitter; load_emitter->emit_code({static_cast(reg_src.getIdx())}, diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/rdft_kernel.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/rdft_kernel.cpp index b3df542c7a62d1..9ed8f61f49ae12 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/rdft_kernel.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/rdft_kernel.cpp @@ -42,8 +42,9 @@ void jit_dft_kernel_f32::generate() { break; } int simd_size = vlen / output_type_size; - if (kernel_type_ == complex_to_complex) + if (kernel_type_ == complex_to_complex) { simd_size = vlen / type_size; + } mov(input_ptr, ptr[param1 + GET_OFF(input)]); mov(input_size, ptr[param1 + GET_OFF(input_size)]); diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/rms_kernel.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/rms_kernel.cpp index 75afb1a62c2931..76eab0e35f471f 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/rms_kernel.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/rms_kernel.cpp @@ -217,8 +217,9 @@ void jit_rms_kernel::generate() { this->postamble(); for (const auto& emitter : emitters) { - if (emitter.second) + if (emitter.second) { emitter.second->emit_data(); + } } } diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/rope_kernel.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/rope_kernel.cpp index aecf89dd51c547..77a66a7b098d97 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/rope_kernel.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/rope_kernel.cpp @@ -52,8 +52,9 @@ void jit_rotary_kernel::generate() { } this->postamble(); for (const auto& emitter : emitters) { - if (emitter.second) + if (emitter.second) { emitter.second->emit_data(); + } } } diff --git a/src/plugins/intel_cpu/src/nodes/llm_mlp.cpp b/src/plugins/intel_cpu/src/nodes/llm_mlp.cpp index b4e7a00c74f8fc..1fc77f2ecd66d4 100644 --- a/src/plugins/intel_cpu/src/nodes/llm_mlp.cpp +++ b/src/plugins/intel_cpu/src/nodes/llm_mlp.cpp @@ -190,12 +190,13 @@ class LinearGateUp { static GateUpCombine jit_gateup_silu(dnnl_eltwise_swish, std::is_same::value); static GateUpCombine jit_gateup_gelu(dnnl_eltwise_gelu_tanh, std::is_same::value); - if (config.act == LLMMLPNode::ACT_FN::GELU) + if (config.act == LLMMLPNode::ACT_FN::GELU) { jit_gateup = &jit_gateup_gelu; - else if (config.act == LLMMLPNode::ACT_FN::SILU) + } else if (config.act == LLMMLPNode::ACT_FN::SILU) { jit_gateup = &jit_gateup_silu; - else + } else { OPENVINO_THROW("unsupported act in GateUpCombine"); + } bool quantized_int8 = config.gate_up_quantized; @@ -247,17 +248,18 @@ class LinearGateUp { ov::parallel_nt_static(m_threads_num, [&](const size_t ithr, const size_t nthr) { auto& work = works[ithr]; if (work) { - if (quantized_int8) + if (quantized_int8) { work.setup(wbuffer.get(ithr), reinterpret_cast(p_weight_gate), reinterpret_cast(p_weight_up), stride, true); - else + } else { work.setup(wbuffer.get(ithr), reinterpret_cast(p_weight_gate), reinterpret_cast(p_weight_up), stride); + } } }); DEBUG_LOG(" setup is done. weight @ ", static_cast(p_weight_gate)); @@ -379,8 +381,9 @@ struct LLMMLP::Executor : public LLMMLP::ExecutorBase { void setM(int M) { uint8_t* cur_scratch_base = nullptr; - if (m_scratchMem) + if (m_scratchMem) { cur_scratch_base = m_scratchMem->getDataAs(); + } // new M larger than previous or the scratch pointer is changed after the following allocation if (m_M < M || cur_scratch_base != m_scratch_base) { ScratchBuffAllocator allocator; @@ -508,8 +511,9 @@ LLMMLP::LLMMLP(const std::shared_ptr& op, const GraphContext::CPtr& co } void LLMMLP::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } std::vector inPortConfigs; std::vector outPortConfigs; @@ -551,12 +555,13 @@ void LLMMLP::initSupportedPrimitiveDescriptors() { getInputShapeAtPort(5), false, -1); // up_weight scales per OC - if (m_mlp_config.down_quantized) + if (m_mlp_config.down_quantized) { inPortConfigs.emplace_back(LayoutType::ncsp, ov::element::f32, getInputShapeAtPort(6), false, -1); // down_weight scales per OC + } // initialize output port outPortConfigs.emplace_back(LayoutType::ncsp, rtPrecision, getOutputShapeAtPort(0), false, -1); diff --git a/src/plugins/intel_cpu/src/nodes/log_softmax.cpp b/src/plugins/intel_cpu/src/nodes/log_softmax.cpp index 05981f170298d9..018e8049aa4951 100644 --- a/src/plugins/intel_cpu/src/nodes/log_softmax.cpp +++ b/src/plugins/intel_cpu/src/nodes/log_softmax.cpp @@ -34,28 +34,34 @@ LogSoftmax::LogSoftmax(const std::shared_ptr& op, const GraphContext:: } const auto logSoftMax = ov::as_type_ptr(op); - if (logSoftMax == nullptr) + if (logSoftMax == nullptr) { OPENVINO_THROW("Operation with name '", op->get_friendly_name(), "' is not an instance of LogSoftmax from opset5."); + } - if (inputShapes.size() != 1 || outputShapes.size() != 1) + if (inputShapes.size() != 1 || outputShapes.size() != 1) { THROW_CPU_NODE_ERR("has incorrect number of input/output edges!"); + } auto dimsSize = getInputShapeAtPort(0).getDims().size(); - if (dimsSize == 0) + if (dimsSize == 0) { dimsSize += 1; + } axis = logSoftMax->get_axis(); - if (axis < 0) + if (axis < 0) { axis += dimsSize; + } - if (dimsSize < static_cast((size_t)(1) + axis)) + if (dimsSize < static_cast(static_cast(1) + axis)) { THROW_CPU_NODE_ERR("has incorrect input parameters dimensions and axis number!"); + } } void LogSoftmax::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } addSupportedPrimDesc({{LayoutType::ncsp, ov::element::f32}}, {{LayoutType::ncsp, ov::element::f32}}, @@ -70,17 +76,21 @@ void LogSoftmax::prepareParams() { int j = static_cast(dims.size()) - 1; for (; j >= 0; j--) { - if (dims[j] != 1) + if (dims[j] != 1) { break; + } } - if (j == axis) + if (j == axis) { isLastDim = true; + } - for (int i = 0; i < axis; i++) + for (int i = 0; i < axis; i++) { axisStep *= dims[i]; + } reducedAxisSize = dims[axis]; - for (size_t i = (axis + 1); i < dims.size(); i++) + for (size_t i = (axis + 1); i < dims.size(); i++) { reducedAxisStride *= dims[i]; + } } void LogSoftmax::executeDynamicImpl(const dnnl::stream& strm) { @@ -98,12 +108,14 @@ void LogSoftmax::execute(const dnnl::stream& strm) { float reduceProd = 0.0f; const float max = *std::max_element(srcDataPtr, srcDataPtr + reducedAxisSize); - for (size_t j = 0; j < reducedAxisSize; ++j) + for (size_t j = 0; j < reducedAxisSize; ++j) { reduceProd += expf(srcDataPtr[j] - max); + } reduceProd = logf(reduceProd); - for (size_t j = 0; j < reducedAxisSize; ++j) + for (size_t j = 0; j < reducedAxisSize; ++j) { dstDataPtr[j] = srcDataPtr[j] - max - reduceProd; + } }); } else { parallel_for2d(axisStep, reducedAxisStride, [&](size_t k, size_t i) { @@ -113,16 +125,19 @@ void LogSoftmax::execute(const dnnl::stream& strm) { float reduceProd = 0.0f; float max = std::numeric_limits::min(); for (size_t j = 0; j < reducedAxisSize; ++j) { - if (srcDataPtr[j * reducedAxisStride] > max) + if (srcDataPtr[j * reducedAxisStride] > max) { max = srcDataPtr[j * reducedAxisStride]; + } } - for (size_t j = 0; j < reducedAxisSize; ++j) + for (size_t j = 0; j < reducedAxisSize; ++j) { reduceProd += expf(srcDataPtr[j * reducedAxisStride] - max); + } reduceProd = logf(reduceProd); - for (size_t j = 0; j < reducedAxisSize; ++j) + for (size_t j = 0; j < reducedAxisSize; ++j) { dstDataPtr[j * reducedAxisStride] = srcDataPtr[j * reducedAxisStride] - max - reduceProd; + } }); } } diff --git a/src/plugins/intel_cpu/src/nodes/lrn.cpp b/src/plugins/intel_cpu/src/nodes/lrn.cpp index 95c14667964135..ce13c03d8ef1bb 100644 --- a/src/plugins/intel_cpu/src/nodes/lrn.cpp +++ b/src/plugins/intel_cpu/src/nodes/lrn.cpp @@ -128,17 +128,21 @@ Lrn::Lrn(const std::shared_ptr& op, const GraphContext::CPtr& context) } void Lrn::getSupportedDescriptors() { - if (!descs.empty()) + if (!descs.empty()) { return; + } - if (getParentEdges().size() != 2) + if (getParentEdges().size() != 2) { THROW_CPU_NODE_ERR("has incorrect number of input edges"); - if (getChildEdges().empty()) + } + if (getChildEdges().empty()) { THROW_CPU_NODE_ERR("has incorrect number of output edges"); + } ov::element::Type precision = getOriginalOutputPrecisionAtPort(0); - if (precision != ov::element::f32 && precision != ov::element::bf16) + if (precision != ov::element::f32 && precision != ov::element::bf16) { precision = ov::element::f32; + } auto inputDataType = DnnlExtensionUtils::ElementTypeToDataType(precision); const auto& parentShape = getInputShapeAtPort(0); @@ -163,14 +167,17 @@ std::shared_ptr Lrn::getSrcMemDesc(const dnnl::primitive_desc& prim_ void Lrn::prepareParams() { auto srcMemPtr = getSrcMemoryAtPort(0); auto dstMemPtr = getDstMemoryAtPort(0); - if (!srcMemPtr || !srcMemPtr->isDefined()) + if (!srcMemPtr || !srcMemPtr->isDefined()) { THROW_CPU_NODE_ERR("input memory is undefined"); - if (!dstMemPtr || !dstMemPtr->isDefined()) + } + if (!dstMemPtr || !dstMemPtr->isDefined()) { THROW_CPU_NODE_ERR("destination memory is undefined"); + } const NodeDesc* selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) + if (selected_pd == nullptr) { THROW_CPU_NODE_ERR("preferable primitive descriptor did not set"); + } auto inpDesc = getParentEdgeAt(0)->getMemory().getDescWithType(); @@ -194,8 +201,9 @@ void Lrn::prepareParams() { const bool found = DnnlExtensionUtils::find_implementation(prim_desc, key.implType); - if (!found) + if (!found) { return nullptr; + } return std::make_shared(prim_desc); }; diff --git a/src/plugins/intel_cpu/src/nodes/mathematics.cpp b/src/plugins/intel_cpu/src/nodes/mathematics.cpp index ebb16097e6d15f..23f4511154c70f 100644 --- a/src/plugins/intel_cpu/src/nodes/mathematics.cpp +++ b/src/plugins/intel_cpu/src/nodes/mathematics.cpp @@ -53,13 +53,15 @@ Math::Math(const std::shared_ptr& op, const GraphContext::CPtr& contex } void Math::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } std::vector inDataConf; inDataConf.reserve(inputShapes.size()); - for (size_t i = 0; i < inputShapes.size(); ++i) + for (size_t i = 0; i < inputShapes.size(); ++i) { inDataConf.emplace_back(LayoutType::ncsp, ov::element::f32); + } addSupportedPrimDesc(inDataConf, {{LayoutType::ncsp, ov::element::f32}}, impl_desc_type::ref_any); } @@ -156,14 +158,15 @@ void Math::execute(const dnnl::stream& strm) { break; case Algorithm::MathSign: parallel_for(dataSize, [&](size_t i) { - if (src_data[i] > 0.0f) + if (src_data[i] > 0.0f) { dst_data[i] = 1.0f; - else if (src_data[i] < 0.0f) + } else if (src_data[i] < 0.0f) { dst_data[i] = -1.0f; - else if (std::isnan(src_data[i])) + } else if (std::isnan(src_data[i])) { dst_data[i] = src_data[i]; - else + } else { dst_data[i] = 0.0f; + } }); break; case Algorithm::MathSin: diff --git a/src/plugins/intel_cpu/src/nodes/matmul.cpp b/src/plugins/intel_cpu/src/nodes/matmul.cpp index 20c5dcf1040e71..d7fe2c4d4895da 100644 --- a/src/plugins/intel_cpu/src/nodes/matmul.cpp +++ b/src/plugins/intel_cpu/src/nodes/matmul.cpp @@ -117,8 +117,9 @@ MatMul::MatMul(const std::shared_ptr& op, const GraphContext::CPtr& co withBiases(false) { std::string errorMessage; - if (!isSupportedOperation(op, errorMessage)) + if (!isSupportedOperation(op, errorMessage)) { OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); + } const auto matMul = ov::as_type_ptr(op); @@ -157,8 +158,9 @@ bool MatMul::canFuse(const NodePtr& node) const { // onednn primitive support U8 output with floating input. if (node->getType() == Type::FakeQuantize && one_of(node->getOriginalOutputPrecisionAtPort(0), ov::element::i8, ov::element::u8) && !canBeExecutedInInt8() && - getOriginalInputPrecisionAtPort(0) == ov::element::f32) + getOriginalInputPrecisionAtPort(0) == ov::element::f32) { return false; + } return canFuseSimpleOperation(node); } @@ -267,10 +269,12 @@ dnnl::memory::desc MatMul::getBiasDescFrom(const DnnlMemoryDescCPtr& outMemDesc) } void MatMul::getSupportedDescriptors() { - if (getParentEdges().size() != getOriginalInputsNumber()) + if (getParentEdges().size() != getOriginalInputsNumber()) { THROW_CPU_NODE_ERR("has incorrect number of input edges for layer ", getName()); - if (getChildEdges().empty()) + } + if (getChildEdges().empty()) { THROW_CPU_NODE_ERR("has incorrect number of output edges for layer ", getName()); + } withBiases = getOriginalInputsNumber() == 3; @@ -278,8 +282,9 @@ void MatMul::getSupportedDescriptors() { auto secondInPortPrec = getOriginalInputPrecisionAtPort(1); auto outPortPrec = getOriginalOutputPrecisionAtPort(0); - if (firstInPortPrec.size() != secondInPortPrec.size()) + if (firstInPortPrec.size() != secondInPortPrec.size()) { firstInPortPrec = secondInPortPrec = getMaxPrecision(getOriginalInputPrecisions()); + } // fallback to fp32 for any precision that cannot be handled natively if ((!one_of(firstInPortPrec, @@ -315,8 +320,9 @@ void MatMul::getSupportedDescriptors() { const auto& inputShape1 = getInputShapeAtPort(1); auto outputShape = getOutputShapeAtPort(0); - if (inputShape0.getRank() != inputShape1.getRank() || inputShape0.getRank() != outputShape.getRank()) + if (inputShape0.getRank() != inputShape1.getRank() || inputShape0.getRank() != outputShape.getRank()) { THROW_CPU_NODE_ERR("has invalid dims count"); + } const int nDims = inputShape0.getRank(); const auto xAxis = nDims - 1; @@ -332,8 +338,9 @@ void MatMul::getSupportedDescriptors() { // coverity[copy_paste_error] if (!dimsEqualWeak(inDims0[xAxis0], inDims1[yAxis1]) || !dimsEqualWeak(inDims0[yAxis0], outDims[yAxis]) || - !dimsEqualWeak(inDims1[xAxis1], outDims[xAxis])) + !dimsEqualWeak(inDims1[xAxis1], outDims[xAxis])) { THROW_CPU_NODE_ERR("has incorrect spatial input and output dimensions"); + } for (int dim_idx = nDims - 3; dim_idx >= 0; dim_idx--) { if ((!dimsEqualWeak(inDims0[dim_idx], outDims[dim_idx]) && !dimsEqualWeak(inDims0[dim_idx], 1)) || @@ -474,8 +481,9 @@ void MatMul::createDescriptor(const std::vector& inputDesc, } void MatMul::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } auto addSupportedPrimitiveDescriptor = [&](const dnnl::primitive_desc& prim_desc) { std::vector inConfs, outConfs; @@ -535,20 +543,22 @@ void MatMul::initSupportedPrimitiveDescriptors() { // fallback. if none of the primitive types is present in the priority list just add first implementation // @todo this fallback is not necessary if primitive priority list is filled correctly - if (supportedPrimitiveDescriptors.empty()) + if (supportedPrimitiveDescriptors.empty()) { addSupportedPrimitiveDescriptor(first_desc); + } } } MemoryDescPtr MatMul::getSrcMemDesc(const dnnl::primitive_desc& prim_desc, size_t idx) const { auto desc = idx > 0 ? prim_desc.weights_desc(idx - 1) : prim_desc.src_desc(idx); - if (idx < 2) // inputs + if (idx < 2) { // inputs return std::make_shared( DnnlExtensionUtils::DataTypeToElementType(desc.get_data_type()), getInputShapeAtPort(idx)); /* provide initial shapes, so hide transpose effect */ - else // bias + } else { // bias return DnnlExtensionUtils::makeDescriptor(desc); + } } bool MatMul::created() const { @@ -563,10 +573,12 @@ void MatMul::prepareParams() { auto dstMemPtr = getDstMemoryAtPort(0); auto src0MemPtr = getSrcMemoryAtPort(0); auto src1MemPtr = getSrcMemoryAtPort(1); - if (!dstMemPtr || !dstMemPtr->isDefined()) + if (!dstMemPtr || !dstMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined destination memory"); - if (!src0MemPtr || !src0MemPtr->isDefined() || !src1MemPtr || !src1MemPtr->isDefined()) + } + if (!src0MemPtr || !src0MemPtr->isDefined() || !src1MemPtr || !src1MemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined input memory"); + } // check for a degenerate case. In this context the degenerate case is a matrix multiplication where the // collapsing dimension is zero, e.g., AB=C, where A has the shape [10, 0] and B has the shape [0, 20], @@ -583,8 +595,9 @@ void MatMul::prepareParams() { } const NodeDesc* selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) + if (selected_pd == nullptr) { THROW_CPU_NODE_ERR("did not set preferable primitive descriptor"); + } DnnlMemoryDescPtr src0TransposedDesc; DnnlMemoryDescPtr src1TransposedDesc; @@ -615,8 +628,9 @@ void MatMul::prepareParams() { DnnlMemoryDescPtr dnnlBiasMemDesc = nullptr; if (withBiases) { auto biasMemory = getSrcMemoryAtPort(2); - if (!biasMemory || !biasMemory->isDefined()) + if (!biasMemory || !biasMemory->isDefined()) { THROW_CPU_NODE_ERR("has undefined bias memory"); + } dnnlBiasMemDesc = biasMemory->getDescWithType(); } @@ -650,8 +664,9 @@ void MatMul::prepareParams() { auto first_desc = dnnl::matmul::primitive_desc(prim_desc.get()); const bool found = DnnlExtensionUtils::find_implementation(prim_desc, key.implType); - if (found) + if (found) { return std::make_shared(prim_desc); + } // In case of dynamic shapes an implementation type chosen as optimal for a primitive_desc with // undefined input shapes, is not necessarily available for the primitive_desc with defined shape. @@ -674,8 +689,9 @@ void MatMul::prepareParams() { primArgs[DNNL_ARG_SRC_0] = src0MemPtr->getPrimitive(); primArgs[DNNL_ARG_WEIGHTS_0] = src1MemPtr->getPrimitive(); primArgs[DNNL_ARG_DST] = dstMemPtr->getPrimitive(); - if (withBiases) + if (withBiases) { primArgs[DNNL_ARG_BIAS] = getSrcMemoryAtPort(2)->getPrimitive(); + } appendPostOpArgs(*attr, primArgs, postOpsArgs); #ifdef CPU_DEBUG_CAPS diff --git a/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp b/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp index da8ed5cf60c3a3..d6c285f8d77fac 100644 --- a/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp +++ b/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp @@ -54,29 +54,35 @@ MatrixNms::MatrixNms(const std::shared_ptr& op, const GraphContext::CP OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } - if (one_of(op->get_type_info(), ov::op::internal::NmsStaticShapeIE::get_type_info_static())) + if (one_of(op->get_type_info(), + ov::op::internal::NmsStaticShapeIE::get_type_info_static())) { m_outStaticShape = true; + } - if (getOriginalInputsNumber() != 2) + if (getOriginalInputsNumber() != 2) { THROW_CPU_NODE_ERR("has incorrect number of input edges: ", getOriginalInputsNumber()); + } - if (getOriginalOutputsNumber() != 3) + if (getOriginalOutputsNumber() != 3) { THROW_CPU_NODE_ERR("has incorrect number of output edges: ", getOriginalOutputsNumber()); + } const auto matrix_nms = ov::as_type_ptr(op); auto& attrs = matrix_nms->get_attrs(); - if (attrs.sort_result_type == ov::op::v8::MatrixNms::SortResultType::CLASSID) + if (attrs.sort_result_type == ov::op::v8::MatrixNms::SortResultType::CLASSID) { m_sortResultType = MatrixNmsSortResultType::CLASSID; - else if (attrs.sort_result_type == ov::op::v8::MatrixNms::SortResultType::SCORE) + } else if (attrs.sort_result_type == ov::op::v8::MatrixNms::SortResultType::SCORE) { m_sortResultType = MatrixNmsSortResultType::SCORE; - else if (attrs.sort_result_type == ov::op::v8::MatrixNms::SortResultType::NONE) + } else if (attrs.sort_result_type == ov::op::v8::MatrixNms::SortResultType::NONE) { m_sortResultType = MatrixNmsSortResultType::NONE; + } - if (attrs.decay_function == ov::op::v8::MatrixNms::DecayFunction::GAUSSIAN) + if (attrs.decay_function == ov::op::v8::MatrixNms::DecayFunction::GAUSSIAN) { m_decayFunction = GAUSSIAN; - else if (attrs.decay_function == ov::op::v8::MatrixNms::DecayFunction::LINEAR) + } else if (attrs.decay_function == ov::op::v8::MatrixNms::DecayFunction::LINEAR) { m_decayFunction = LINEAR; + } m_sortResultAcrossBatch = attrs.sort_result_across_batch; m_scoreThreshold = attrs.score_threshold; @@ -98,18 +104,22 @@ MatrixNms::MatrixNms(const std::shared_ptr& op, const GraphContext::CP } const auto& boxes_dims = getInputShapeAtPort(NMS_BOXES).getDims(); - if (boxes_dims.size() != 3) + if (boxes_dims.size() != 3) { THROW_CPU_NODE_ERR("has unsupported 'boxes' input rank: ", boxes_dims.size()); - if (boxes_dims[2] != 4) + } + if (boxes_dims[2] != 4) { THROW_CPU_NODE_ERR("has unsupported 'boxes' input 3rd dimension size: ", boxes_dims[2]); + } const auto& scores_dims = getInputShapeAtPort(NMS_SCORES).getDims(); - if (scores_dims.size() != 3) + if (scores_dims.size() != 3) { THROW_CPU_NODE_ERR("has unsupported 'scores' input rank: ", scores_dims.size()); + } } void MatrixNms::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } const std::vector supportedFloatPrecision = {ov::element::f32, ov::element::f16}; const std::vector supportedIntOutputPrecision = {ov::element::i32, ov::element::i64}; @@ -242,8 +252,9 @@ size_t MatrixNms::nmsMatrix(const float* boxesData, minDecay = std::min(minDecay, decay); } auto ds = minDecay * scoresData[candidateIndex[i]]; - if (ds <= m_postThreshold) + if (ds <= m_postThreshold) { continue; + } auto boxIndex = candidateIndex[i]; auto box = boxesData + boxIndex * 4; filterBoxes[numDet].box.x1 = box[0]; @@ -275,14 +286,16 @@ void MatrixNms::prepareParams() { size_t real_num_classes = m_backgroundClass == -1 ? m_numClasses : static_cast(m_backgroundClass) < m_numClasses ? m_numClasses - 1 : m_numClasses; - if (m_nmsTopk >= 0) + if (m_nmsTopk >= 0) { max_output_boxes_per_class = std::min(m_numBoxes, static_cast(m_nmsTopk)); - else + } else { max_output_boxes_per_class = m_numBoxes; + } m_maxBoxesPerBatch = max_output_boxes_per_class * real_num_classes; - if (m_keepTopk >= 0) + if (m_keepTopk >= 0) { m_maxBoxesPerBatch = std::min(m_maxBoxesPerBatch, static_cast(m_keepTopk)); + } m_realNumClasses = real_num_classes; m_realNumBoxes = m_nmsTopk == -1 ? m_numBoxes : std::min(m_nmsTopk, static_cast(m_numBoxes)); @@ -295,8 +308,9 @@ void MatrixNms::prepareParams() { m_classOffset.resize(m_numClasses, 0); for (size_t i = 0, count = 0; i < m_numClasses; i++) { - if (i == static_cast(m_backgroundClass)) + if (i == static_cast(m_backgroundClass)) { continue; + } m_classOffset[i] = (count++) * m_realNumBoxes; } } @@ -338,7 +352,7 @@ void MatrixNms::execute(const dnnl::stream& strm) { size_t batchOffset = batchIdx * m_realNumClasses * m_realNumBoxes; BoxInfo* batchFilteredBox = m_filteredBoxes.data() + batchOffset; auto& numPerClass = m_numPerBatchClass[batchIdx]; - auto numDet = std::accumulate(numPerClass.begin(), numPerClass.end(), int64_t(0)); + auto numDet = std::accumulate(numPerClass.begin(), numPerClass.end(), static_cast(0)); auto start_offset = numPerClass[0]; for (size_t i = 1; i < numPerClass.size(); i++) { @@ -350,8 +364,9 @@ void MatrixNms::execute(const dnnl::stream& strm) { } auto keepNum = numDet; if (m_keepTopk > -1) { - if (keepNum > m_keepTopk) + if (keepNum > m_keepTopk) { keepNum = m_keepTopk; + } } std::partial_sort( @@ -406,14 +421,15 @@ void MatrixNms::execute(const dnnl::stream& strm) { // NMS-alike nodes are always transformed to NMSIEInternal node in case of legacy api, for compatibility. // And on the other hand in case of api 2.0, keep them internal dynamic for better performance and functionality. if (!m_outStaticShape) { - size_t totalBox = std::accumulate(m_numPerBatch.begin(), m_numPerBatch.end(), size_t(0)); + size_t totalBox = std::accumulate(m_numPerBatch.begin(), m_numPerBatch.end(), static_cast(0)); redefineOutputMemory({{totalBox, 6}, {totalBox, 1}, {m_numBatches}}); } float* selectedOutputs = selectedOutputsMemPtr->getDataAs(); int* selectedIndices = selectedIndicesMemPtr->getDataAs(); int* validOutputs = validOutputsMemPtr->getDataAs(); - for (size_t i = 0; i < m_numPerBatch.size(); i++) + for (size_t i = 0; i < m_numPerBatch.size(); i++) { validOutputs[i] = static_cast(m_numPerBatch[i]); + } int64_t outputOffset = 0; int64_t originalOffset = 0; @@ -447,8 +463,9 @@ void MatrixNms::checkPrecision(const ov::element::Type prec, const std::vector& precList, const std::string& name, const std::string& type) { - if (std::find(precList.begin(), precList.end(), prec) == precList.end()) + if (std::find(precList.begin(), precList.end(), prec) == precList.end()) { THROW_CPU_NODE_ERR("has unsupported '", name, "' ", type, " precision: ", prec); + } } } // namespace node diff --git a/src/plugins/intel_cpu/src/nodes/memory.cpp b/src/plugins/intel_cpu/src/nodes/memory.cpp index 8b29ac8cbfbadb..fb643197eb611c 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.cpp +++ b/src/plugins/intel_cpu/src/nodes/memory.cpp @@ -166,8 +166,9 @@ MemoryInputBase& MemoryOutputBase::getInputNode() { void MemoryOutputBase::getSupportedDescriptors() {} void MemoryOutputBase::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } auto&& shape = getInputShapeAtPort(0); auto precision = getOriginalInputPrecisionAtPort(0); @@ -458,8 +459,9 @@ MemoryOutputBase& MemoryInputBase::getOutputNode() { } void MemoryInputBase::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } auto precision = getOriginalOutputPrecisionAtPort(0); auto&& descCreators = ov::intel_cpu::BlockedDescCreator::getCommonCreators(); @@ -522,8 +524,9 @@ void MemoryStatesRegister::registerOutput(MemoryOutputBase* node) { } void MemoryStatesRegister::remove(MemoryNode* node) { - if (nullptr == node) + if (nullptr == node) { return; + } ov::util::erase_if(memory_inputs, [&](const InputNodesMap::value_type& it) { return it.second == node; }); @@ -952,8 +955,9 @@ MemStatePtr MemoryInputSDPA::makeState() const { OPENVINO_ASSERT(node); auto kv_precision = node->getKVCachePrecision(); VectorDims order = {2, 0, 1, 3}; - if (!node->getKVCacheOrder().empty()) + if (!node->getKVCacheOrder().empty()) { order = node->getKVCacheOrder(); + } auto internal_desc = ArbitraryOrderDescCreator(order).createSharedDesc(kv_precision, outputShapes.at(0)); diff --git a/src/plugins/intel_cpu/src/nodes/mha.cpp b/src/plugins/intel_cpu/src/nodes/mha.cpp index 43867cd99b2b01..44efb3779589c3 100644 --- a/src/plugins/intel_cpu/src/nodes/mha.cpp +++ b/src/plugins/intel_cpu/src/nodes/mha.cpp @@ -164,8 +164,9 @@ struct jit_mul_add_softmax_kernel : public jit_uni_mul_add_softmax_kernel, publi vbroadcastss(get_vmm_denom(0), xmm_tmp); uni_vdivps(get_vmm_denom(0), get_vmm_denom(0), get_vmm_aux(0)); - if (jcp_.with_scales1) + if (jcp_.with_scales1) { mov(reg_scales, ptr[reg_params + GET_OFF(p_scales1)]); + } if (jcp_.with_scales1 && jcp_.broadcast_scales1) { uni_vmovss(Xmm(vmm_scales.getIdx()), ptr[reg_scales]); @@ -192,8 +193,9 @@ struct jit_mul_add_softmax_kernel : public jit_uni_mul_add_softmax_kernel, publi this->postamble(); for (const auto& emitter : emitters) { - if (emitter.second) + if (emitter.second) { emitter.second->emit_data(); + } } exp_emitter->emit_data(); @@ -455,8 +457,9 @@ struct jit_convert_reorder_kernel : public jit_uni_convert_reorder_kernel, publi this->postamble(); for (const auto& emitter : emitters) { - if (emitter.second) + if (emitter.second) { emitter.second->emit_data(); + } } } @@ -620,8 +623,9 @@ struct jit_convert_transpose_kernel : public jit_uni_convert_transpose_kernel, p this->postamble(); for (const auto& emitter : emitters) { - if (emitter.second) + if (emitter.second) { emitter.second->emit_data(); + } } } @@ -746,8 +750,9 @@ bool MHA::isSupportedOperation(const std::shared_ptr& op, std::s supportedPrecisions = false; } } else { - if (mha->get_fq0_output_type() != mha->get_input_element_type(0)) + if (mha->get_fq0_output_type() != mha->get_input_element_type(0)) { supportedPrecisions = false; + } } if (!mha->get_fq_scales1().empty() && mha->get_fq1_output_type() != element::i8) { @@ -814,8 +819,9 @@ MHA::MHA(const std::shared_ptr& op, const GraphContext::CPtr& context) } void MHA::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } for (auto idx : {0, 1, 2, 3}) { inputPrecisions.push_back(getOriginalInputPrecisionAtPort(idx)); @@ -832,8 +838,9 @@ void MHA::initSupportedPrimitiveDescriptors() { inputPrecisions[2] = ov::element::f32; - if (inputPrecisions[3] == ov::element::i8 && fqScales2.empty()) + if (inputPrecisions[3] == ov::element::i8 && fqScales2.empty()) { inputPrecisions[3] = ov::element::f32; + } outputPrecision = getOriginalOutputPrecisionAtPort(0); if (!one_of(outputPrecision, ov::element::f32, ov::element::bf16, ov::element::i8, ov::element::u8)) { @@ -969,8 +976,9 @@ void MHA::init_brgemm_copy_b(std::unique_ptr& brgCop #if defined(OPENVINO_ARCH_X86_64) auto ret = create_brgemm_matmul_copy_b(brgCopyKernel, &brgCopyKernelConf); - if (ret != dnnl::impl::status_t::dnnl_success) + if (ret != dnnl::impl::status_t::dnnl_success) { THROW_CPU_NODE_ERR("cannot create_brgemm_matmul_copy_b kernel, dnnl_status: ", ret); + } #endif // OPENVINO_ARCH_X86_64 } @@ -1063,8 +1071,9 @@ void MHA::prepareParams() { // don't create brgemm kernels for empty tiles if (M_ != 0 && K_ != 0 && N_ != 0) { - if (brg0BaseIdx == std::numeric_limits::max()) + if (brg0BaseIdx == std::numeric_limits::max()) { brg0BaseIdx = getBrgIdx(m, k, n); + } init_brgemm(brgemmCtx, brgKernels0[getBrgIdx(m, k, n)], brg0WithAMX); } } @@ -1131,8 +1140,9 @@ void MHA::prepareParams() { // don't create brgemm kernels for empty tiles if (M_ != 0 && K_ != 0 && N_ != 0) { - if (brg1BaseIdx == std::numeric_limits::max()) + if (brg1BaseIdx == std::numeric_limits::max()) { brg1BaseIdx = getBrgIdx(m, k, n); + } init_brgemm(brgemmCtx, brgKernels1[getBrgIdx(m, k, n)], brg1WithAMX); } @@ -1257,14 +1267,17 @@ void MHA::prepareParams() { } } - if (mulAddSoftmaxKernel) + if (mulAddSoftmaxKernel) { mulAddSoftmaxKernel->create_ker(); + } - if (convertReorderKernel) + if (convertReorderKernel) { convertReorderKernel->create_ker(); + } - if (convertTransposeKernel) + if (convertTransposeKernel) { convertTransposeKernel->create_ker(); + } const auto& selectedPD = getSelectedPrimitiveDescriptor(); if (brgemmCtx0.is_with_amx || brgemmCtx1.is_with_amx) { @@ -1301,8 +1314,9 @@ void MHA::callBrgemm(brgemmCtx& ctx, void* pout, void* wsp) { #if defined(OPENVINO_ARCH_X86_64) - if (ctx.is_with_amx) + if (ctx.is_with_amx) { amx_tile_configure(ctx.palette); + } if (ctx.is_with_comp) { brgemm_post_ops_data_t post_ops_data; brgemm_kernel_execute_postops(brgKernel.get(), 1, pin0, pin1, nullptr, pout, pout, post_ops_data, wsp); diff --git a/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp b/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp index c735aea89b8660..e0d2f5b0ce058a 100644 --- a/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp +++ b/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp @@ -49,18 +49,22 @@ MultiClassNms::MultiClassNms(const std::shared_ptr& op, const GraphCon OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } - if (one_of(op->get_type_info(), ov::op::internal::MulticlassNmsIEInternal::get_type_info_static())) + if (one_of(op->get_type_info(), ov::op::internal::MulticlassNmsIEInternal::get_type_info_static())) { m_outStaticShape = true; + } - if (getOriginalInputsNumber() != 2 && getOriginalInputsNumber() != 3) + if (getOriginalInputsNumber() != 2 && getOriginalInputsNumber() != 3) { THROW_CPU_NODE_ERR("has incorrect number of input edges: ", getOriginalInputsNumber()); + } - if (getOriginalOutputsNumber() != 3) + if (getOriginalOutputsNumber() != 3) { THROW_CPU_NODE_ERR("has incorrect number of output edges: ", getOriginalOutputsNumber()); + } auto nmsBase = ov::as_type_ptr(op); - if (nmsBase == nullptr) + if (nmsBase == nullptr) { THROW_CPU_NODE_ERR("is not an instance of MulticlassNmsBase."); + } auto& atrri = nmsBase->get_attrs(); m_sortResultAcrossBatch = atrri.sort_result_across_batch; m_nmsTopK = atrri.nms_top_k; @@ -68,12 +72,13 @@ MultiClassNms::MultiClassNms(const std::shared_ptr& op, const GraphCon m_scoreThreshold = atrri.score_threshold; m_backgroundClass = atrri.background_class; m_keepTopK = atrri.keep_top_k; - if (atrri.sort_result_type == ngNmsSortResultType::CLASSID) + if (atrri.sort_result_type == ngNmsSortResultType::CLASSID) { m_sortResultType = MulticlassNmsSortResultType::CLASSID; - else if (atrri.sort_result_type == ngNmsSortResultType::SCORE) + } else if (atrri.sort_result_type == ngNmsSortResultType::SCORE) { m_sortResultType = MulticlassNmsSortResultType::SCORE; - else if (atrri.sort_result_type == ngNmsSortResultType::NONE) + } else if (atrri.sort_result_type == ngNmsSortResultType::NONE) { m_sortResultType = MulticlassNmsSortResultType::NONE; + } m_nmsEta = atrri.nms_eta; m_normalized = atrri.normalized; @@ -83,30 +88,37 @@ MultiClassNms::MultiClassNms(const std::shared_ptr& op, const GraphCon const auto& scores_dims = getInputShapeAtPort(NMS_SCORES).getDims(); auto boxes_ps = PartialShape(boxes_dims); auto scores_ps = PartialShape(scores_dims); - if (boxes_dims.size() != 3) + if (boxes_dims.size() != 3) { THROW_CPU_NODE_ERR("has unsupported 'boxes' input rank: ", boxes_dims.size()); - if (boxes_dims[2] != 4) + } + if (boxes_dims[2] != 4) { THROW_CPU_NODE_ERR("has unsupported 'boxes' input 3rd dimension size: ", boxes_dims[2]); + } if (scores_dims.size() == 3) { - if (!boxes_ps[0].compatible(scores_ps[0]) || !boxes_ps[1].compatible(scores_ps[2])) + if (!boxes_ps[0].compatible(scores_ps[0]) || !boxes_ps[1].compatible(scores_ps[2])) { THROW_CPU_NODE_ERR("has incompatible 'boxes' and 'scores' shape ", boxes_ps, " v.s. ", scores_ps); + } } else if (scores_dims.size() == 2) { - if (op->get_type_info() == ov::op::v8::MulticlassNms::get_type_info_static()) + if (op->get_type_info() == ov::op::v8::MulticlassNms::get_type_info_static()) { THROW_CPU_NODE_ERR("has unsupported 'scores' input rank: ", scores_dims.size()); - if (!boxes_ps[0].compatible(scores_ps[0]) || !boxes_ps[1].compatible(scores_ps[1])) + } + if (!boxes_ps[0].compatible(scores_ps[0]) || !boxes_ps[1].compatible(scores_ps[1])) { THROW_CPU_NODE_ERR("has incompatible 'boxes' and 'scores' shape ", boxes_ps, " v.s. ", scores_ps); - if (getOriginalInputsNumber() != 3) + } + if (getOriginalInputsNumber() != 3) { THROW_CPU_NODE_ERR("has incorrect number of input edges: ", getOriginalInputsNumber(), " when input 'scores' is 2D."); + } } else { THROW_CPU_NODE_ERR("has unsupported 'scores' input rank: ", scores_dims.size()); } } void MultiClassNms::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } const std::vector supportedFloatPrecision = {ov::element::f32, ov::element::f16, @@ -158,29 +170,33 @@ void MultiClassNms::prepareParams() { const auto shared = scores_dims.size() == 3; // bboxes shared among classes if (shared) { - if (boxes_dims[0] != scores_dims[0] || boxes_dims[1] != scores_dims[2]) + if (boxes_dims[0] != scores_dims[0] || boxes_dims[1] != scores_dims[2]) { THROW_CPU_NODE_ERR("has incompatible 'boxes' and 'scores' shape ", PartialShape(boxes_dims), " v.s. ", PartialShape(scores_dims)); + } } else if (scores_dims.size() == 2) { - if (boxes_dims[0] != scores_dims[0] || boxes_dims[1] != scores_dims[1]) + if (boxes_dims[0] != scores_dims[0] || boxes_dims[1] != scores_dims[1]) { THROW_CPU_NODE_ERR("has incompatible 'boxes' and 'scores' shape ", PartialShape(boxes_dims), " v.s. ", PartialShape(scores_dims)); - if (!has_roinum) + } + if (!has_roinum) { THROW_CPU_NODE_ERR("has incorrect number of input edges: ", getOriginalInputsNumber(), " when input 'scores' is 2D."); + } } else { THROW_CPU_NODE_ERR("has unsupported 'scores' input rank: ", scores_dims.size()); } if (has_roinum) { const auto& roisnum_dims = getParentEdgeAt(NMS_ROISNUM)->getMemory().getStaticDims(); - if (roisnum_dims.size() != 1) + if (roisnum_dims.size() != 1) { THROW_CPU_NODE_ERR("has unsupported 'roisnum' input rank: ", roisnum_dims.size()); + } m_numBatches = shared ? boxes_dims[0] : roisnum_dims[0]; } else { m_numBatches = boxes_dims[0]; @@ -199,8 +215,9 @@ void MultiClassNms::prepareParams() { m_nmsRealTopk = max_output_boxes_per_class; m_maxBoxesPerBatch = max_output_boxes_per_class * real_num_classes; - if (m_keepTopK >= 0) + if (m_keepTopK >= 0) { m_maxBoxesPerBatch = std::min(m_maxBoxesPerBatch, static_cast(m_keepTopK)); + } m_numFiltBox.resize(m_numBatches); // number of rois after nms for each class in each image for (auto& numPerBatch : m_numFiltBox) { @@ -228,8 +245,9 @@ void MultiClassNms::execute(const dnnl::stream& strm) { auto dims_boxes = getParentEdgeAt(NMS_BOXES)->getMemory().getStaticDims(); auto dims_scores = getParentEdgeAt(NMS_SCORES)->getMemory().getStaticDims(); - if (m_nmsRealTopk == 0) + if (m_nmsRealTopk == 0) { return; + } const bool has_roinum = getOriginalInputsNumber() == 3; const auto shared = dims_scores.size() == 3; // bboxes shared among classes @@ -268,8 +286,9 @@ void MultiClassNms::execute(const dnnl::stream& strm) { batchOffsetNew += m_numFiltBox[b][c]; } m_numBoxOffset[b] = batchOffsetNew; - if (b == 0) + if (b == 0) { m_numBoxOffset[b] += m_numFiltBox[0][0]; + } } // sort element before go through keep_top_k parallel_sort( @@ -362,7 +381,7 @@ void MultiClassNms::execute(const dnnl::stream& strm) { } if (!m_outStaticShape) { - size_t totalBox = std::accumulate(m_selected_num.begin(), m_selected_num.end(), size_t(0)); + size_t totalBox = std::accumulate(m_selected_num.begin(), m_selected_num.end(), static_cast(0)); redefineOutputMemory({{totalBox, 6}, {totalBox, 1}, {m_numBatches}}); } int* selected_indices = selectedIndicesMemPtr->getDataAs(); @@ -444,8 +463,9 @@ float MultiClassNms::intersectionOverUnion(const float* boxesI, const float* box float areaI = (ymaxI - yminI + norm) * (xmaxI - xminI + norm); float areaJ = (ymaxJ - yminJ + norm) * (xmaxJ - xminJ + norm); - if (areaI <= 0.f || areaJ <= 0.f) + if (areaI <= 0.f || areaJ <= 0.f) { return 0.f; + } float intersection_area = (std::max)((std::min)(ymaxI, ymaxJ) - (std::max)(yminI, yminJ) + norm, 0.f) * (std::max)((std::min)(xmaxI, xmaxJ) - (std::max)(xminI, xminJ) + norm, 0.f); @@ -484,8 +504,9 @@ void MultiClassNms::nmsWithEta(const float* boxes, std::priority_queue, decltype(less)> sorted_boxes(less); int cur_numBoxes = shared ? m_numBoxes : roisnum[batch_idx]; for (int box_idx = 0; box_idx < cur_numBoxes; box_idx++) { - if (scoresPtr[box_idx] >= m_scoreThreshold) // algin with ref + if (scoresPtr[box_idx] >= m_scoreThreshold) { // algin with ref sorted_boxes.emplace(boxInfo({scoresPtr[box_idx], box_idx, 0})); + } } fb.reserve(sorted_boxes.size()); if (sorted_boxes.size() > 0) { @@ -508,8 +529,9 @@ void MultiClassNms::nmsWithEta(const float* boxes, box_is_selected = false; break; } - if (currBox.score <= m_scoreThreshold) + if (currBox.score <= m_scoreThreshold) { break; + } } currBox.suppress_begin_index = fb.size(); @@ -550,10 +572,11 @@ const float* MultiClassNms::slice_class(const int batch_idx, const VectorDims& roisnumStrides, const bool shared) { if (shared) { - if (is_boxes) + if (is_boxes) { return dataPtr + batch_idx * dataStrides[0]; - else + } else { return dataPtr + batch_idx * dataStrides[0] + class_idx * dataStrides[1]; + } } // get M boxes of current class_idx : 1, M, 4 @@ -596,8 +619,9 @@ void MultiClassNms::nmsWithoutEta(const float* boxes, std::vector> sorted_boxes; int cur_numBoxes = shared ? m_numBoxes : roisnum[batch_idx]; for (int box_idx = 0; box_idx < cur_numBoxes; box_idx++) { - if (scoresPtr[box_idx] >= m_scoreThreshold) // align with ref + if (scoresPtr[box_idx] >= m_scoreThreshold) { // align with ref sorted_boxes.emplace_back(std::make_pair(scoresPtr[box_idx], box_idx)); + } } int io_selection_size = 0; @@ -643,8 +667,9 @@ void MultiClassNms::checkPrecision(const ov::element::Type prec, const std::vector& precList, const std::string& name, const std::string& type) { - if (std::find(precList.begin(), precList.end(), prec) == precList.end()) + if (std::find(precList.begin(), precList.end(), prec) == precList.end()) { THROW_CPU_NODE_ERR("has unsupported '", name, "' ", type, " precision: ", prec); + } } } // namespace node diff --git a/src/plugins/intel_cpu/src/nodes/mvn.cpp b/src/plugins/intel_cpu/src/nodes/mvn.cpp index df017fe052c343..829ea06a0bd1a5 100644 --- a/src/plugins/intel_cpu/src/nodes/mvn.cpp +++ b/src/plugins/intel_cpu/src/nodes/mvn.cpp @@ -174,23 +174,26 @@ struct jit_uni_mvn_mean_variance_kernel_f32 : public jit_uni_mvn_mean_variance_k }; worker_tails(reg_rt_shape, tails_func); // hsum+store - if (!jcp_.normalize_variance && !isFloatCompatible(jcp_.src_prc)) + if (!jcp_.normalize_variance && !isFloatCompatible(jcp_.src_prc)) { uni_vcvtdq2ps(vmm_sum, vmm_sum); + } Vmm vmm_dst = jcp_.normalize_variance ? vmm_variance : vmm_sum; reduce_sum_store_vmm(vmm_dst.getIdx()); } else if (jcp_.layout == MVNLayoutType::mvn_by_channel) { - if (jcp_.across_channels) + if (jcp_.across_channels) { nspc_ac_ker(); - else + } else { nspc_pc_ker(); + } } else { block_ker(); } this->postamble(); - for (size_t i = 0; i < LOAD_EMITTERS_NUM; i++) + for (size_t i = 0; i < LOAD_EMITTERS_NUM; i++) { load_emitter[i]->emit_data(); + } prepare_table(); } @@ -259,8 +262,9 @@ struct jit_uni_mvn_mean_variance_kernel_f32 : public jit_uni_mvn_mean_variance_k }; worker_tails(reg_work_amount, tails_func); - if (!jcp_.normalize_variance && !isFloatCompatible(jcp_.src_prc)) + if (!jcp_.normalize_variance && !isFloatCompatible(jcp_.src_prc)) { uni_vcvtdq2ps(vmm_sum, vmm_sum); + } Vmm vmm_dst = jcp_.normalize_variance ? vmm_variance : vmm_sum; reduce_sum_store_vmm(vmm_dst.getIdx()); } @@ -327,8 +331,9 @@ struct jit_uni_mvn_mean_variance_kernel_f32 : public jit_uni_mvn_mean_variance_k int ur_base = 4; auto init = [&](int vmm_id) { uni_vpxor(Vmm(ur_base + 4 + vmm_id), Vmm(ur_base + 4 + vmm_id), Vmm(ur_base + 4 + vmm_id)); - if (jcp_.normalize_variance) + if (jcp_.normalize_variance) { uni_vmovups(Vmm(ur_base + 8 + vmm_id), ptr[reg_mean + vmm_id * vlen]); + } }; auto load_src = [&](int vmm_id) { load_emitter[VECTOR]->emit_code({static_cast(reg_src_aux.getIdx())}, @@ -345,18 +350,20 @@ struct jit_uni_mvn_mean_variance_kernel_f32 : public jit_uni_mvn_mean_variance_k uni_vsubps(Vmm(ur_base + vmm_id), Vmm(ur_base + vmm_id), Vmm(ur_base + 8 + vmm_id)); uni_vfmadd231ps(Vmm(ur_base + 4 + vmm_id), Vmm(ur_base + vmm_id), Vmm(ur_base + vmm_id)); } else { - if (!isFloatCompatible(jcp_.src_prc)) + if (!isFloatCompatible(jcp_.src_prc)) { uni_vpaddd(Vmm(ur_base + 4 + vmm_id), Vmm(ur_base + 4 + vmm_id), Vmm(ur_base + vmm_id)); - else + } else { uni_vaddps(Vmm(ur_base + 4 + vmm_id), Vmm(ur_base + 4 + vmm_id), Vmm(ur_base + vmm_id)); + } } }; auto store = [&](int vmm_id) { if (jcp_.normalize_variance) { uni_vmovups(ptr[reg_variance + vmm_id * vector_step * sizeof(float)], Vmm(ur_base + 4 + vmm_id)); } else { - if (!isFloatCompatible(jcp_.src_prc)) + if (!isFloatCompatible(jcp_.src_prc)) { uni_vcvtdq2ps(Vmm(ur_base + 4 + vmm_id), Vmm(ur_base + 4 + vmm_id)); + } uni_vmovups(ptr[reg_sum + vmm_id * vector_step * sizeof(float)], Vmm(ur_base + 4 + vmm_id)); } }; @@ -485,10 +492,11 @@ struct jit_uni_mvn_mean_variance_kernel_f32 : public jit_uni_mvn_mean_variance_k uni_vsubps(Vmm(ur_base + vmm_id), Vmm(ur_base + vmm_id), Vmm(ur_base + 8 + vmm_id)); uni_vfmadd231ps(Vmm(ur_base + 4 + vmm_id), Vmm(ur_base + vmm_id), Vmm(ur_base + vmm_id)); } else { - if (!isFloatCompatible(jcp_.src_prc)) + if (!isFloatCompatible(jcp_.src_prc)) { uni_vpaddd(Vmm(ur_base + 4 + vmm_id), Vmm(ur_base + 4 + vmm_id), Vmm(ur_base + vmm_id)); - else + } else { uni_vaddps(Vmm(ur_base + 4 + vmm_id), Vmm(ur_base + 4 + vmm_id), Vmm(ur_base + vmm_id)); + } } }; auto store_tails = [&](size_t step) { @@ -497,8 +505,9 @@ struct jit_uni_mvn_mean_variance_kernel_f32 : public jit_uni_mvn_mean_variance_k uni_vmovups(ptr[reg_variance], Vmm(ur_base + 4 + vmm_id)); add(reg_variance, step * sizeof(float)); } else { - if (!isFloatCompatible(jcp_.src_prc)) + if (!isFloatCompatible(jcp_.src_prc)) { uni_vcvtdq2ps(Vmm(ur_base + 4 + vmm_id), Vmm(ur_base + 4 + vmm_id)); + } uni_vmovups(ptr[reg_sum], Vmm(ur_base + 4 + vmm_id)); add(reg_sum, step * sizeof(float)); } @@ -579,8 +588,9 @@ struct jit_uni_mvn_mean_variance_kernel_f32 : public jit_uni_mvn_mean_variance_k uni_vmovups(ptr[reg_variance], vmm_variance); } else { if (!isFloatCompatible( - jcp_.src_prc)) // add with int for int-family data type, other compute go with float + jcp_.src_prc)) { // add with int for int-family data type, other compute go with float uni_vcvtdq2ps(vmm_sum, vmm_sum); + } if (!jcp_.across_channels) { uni_vmovups(vmm_val, ptr[reg_sum]); @@ -735,17 +745,19 @@ struct jit_uni_mvn_mean_variance_kernel_f32 : public jit_uni_mvn_mean_variance_k if (jcp_.normalize_variance) { // all with float - if (!isFloatCompatible(jcp_.src_prc)) + if (!isFloatCompatible(jcp_.src_prc)) { uni_vcvtdq2ps(vmm_val, vmm_val); + } uni_vsubps(vmm_val, vmm_val, vmm_mean); uni_vfmadd231ps(vmm_variance, vmm_val, vmm_val); } else { // for sum, int execute prc for int-family data type - if (!isFloatCompatible(jcp_.src_prc)) + if (!isFloatCompatible(jcp_.src_prc)) { uni_vpaddd(vmm_sum, vmm_sum, vmm_val); - else + } else { uni_vaddps(vmm_sum, vmm_sum, vmm_val); + } } } @@ -826,8 +838,9 @@ struct jit_uni_mvn_mean_variance_kernel_f32 : public jit_uni_mvn_mean_variance_k } } if (jcp_.normalize_variance) { - if (!isFloatCompatible(jcp_.src_prc)) + if (!isFloatCompatible(jcp_.src_prc)) { uni_vcvtdq2ps(vmm_val, vmm_val); + } uni_vsubps(vmm_val, vmm_val, vmm_mean); if (is_zero_pad) { uni_vpxor(vmm_zero, vmm_zero, vmm_zero); @@ -849,10 +862,11 @@ struct jit_uni_mvn_mean_variance_kernel_f32 : public jit_uni_mvn_mean_variance_k } uni_vfmadd231ps(vmm_variance, vmm_val, vmm_val); } else { - if (!isFloatCompatible(jcp_.src_prc)) + if (!isFloatCompatible(jcp_.src_prc)) { uni_vpaddd(vmm_sum, vmm_sum, vmm_val); - else + } else { uni_vaddps(vmm_sum, vmm_sum, vmm_val); + } } } @@ -993,8 +1007,9 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator mov(reg_post_ops_data, ptr[reg_params + GET_OFF(post_op_data)]); mov(reg_src, ptr[reg_params + GET_OFF(src)]); mov(reg_mean, ptr[reg_params + GET_OFF(mean)]); - if (jcp_.normalize_variance) + if (jcp_.normalize_variance) { mov(reg_variance_inv, ptr[reg_params + GET_OFF(variance)]); + } mov(reg_dst, ptr[reg_params + GET_OFF(dst)]); mov(reg_work_amount, ptr[reg_params + GET_OFF(work_amount)]); mov(reg_rt_shape, ptr[reg_params + GET_OFF(rt_shape_size)]); @@ -1007,12 +1022,14 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator if (jcp_.layout == MVNLayoutType::mvn_planar || jcp_.across_channels) { uni_vbroadcastss(vmm_mean, ptr[reg_mean]); - if (jcp_.normalize_variance) + if (jcp_.normalize_variance) { uni_vbroadcastss(vmm_variance_inv, ptr[reg_variance_inv]); + } } else { uni_vmovups(vmm_mean, ptr[reg_mean]); - if (jcp_.normalize_variance) + if (jcp_.normalize_variance) { uni_vmovups(vmm_variance_inv, ptr[reg_variance_inv]); + } } uni_vpxor(vmm_zero, vmm_zero, vmm_zero); @@ -1032,23 +1049,27 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator }; worker_mvn_tails(reg_rt_shape, tails_func); } else if (jcp_.layout == MVNLayoutType::mvn_by_channel) { - if (jcp_.across_channels) + if (jcp_.across_channels) { norm_nspc_ac_ker(); - else + } else { norm_nspc_pc_ker(); + } } else { norm_block_ker(); } this->postamble(); - for (size_t i = 0; i < EMITTERS_NUM; i++) + for (size_t i = 0; i < EMITTERS_NUM; i++) { load_emitter[i]->emit_data(); - for (size_t i = 0; i < EMITTERS_NUM; i++) + } + for (size_t i = 0; i < EMITTERS_NUM; i++) { store_emitter[i]->emit_data(); + } - for (auto& inj : eltwise_injectors) + for (auto& inj : eltwise_injectors) { inj->prepare_table(); + } } private: @@ -1604,8 +1625,9 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator worker_mvn_block(tile_size); add(reg_src, tile_size * jcp_.src_data_size); add(reg_dst, tile_size * jcp_.dst_data_size); - if (attr_.post_ops_.len() != 0) + if (attr_.post_ops_.len() != 0) { add(reg_oc_off, tile_size * sizeof(float)); + } }; worker_mvn_tails(reg_rt_shape, tails_func); @@ -1631,8 +1653,9 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator add(reg_src, src_stride); add(reg_dst, dst_stride); - if (jcp_.layout == MVNLayoutType::mvn_by_channel && attr_.post_ops_.len() != 0) + if (jcp_.layout == MVNLayoutType::mvn_by_channel && attr_.post_ops_.len() != 0) { add(reg_oc_off, vector_step * sizeof(float)); + } sub(reg_work_amount, step_sub); @@ -1648,8 +1671,9 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator {load_pool_gpr_idxs}); uni_vsubps(vmm_val, vmm_val, vmm_mean); - if (jcp_.normalize_variance) + if (jcp_.normalize_variance) { uni_vmulps(vmm_val, vmm_val, vmm_variance_inv); + } apply_post_ops(jcp_.dst_prc, vmm_val.getIdx(), jcp_.layout == MVNLayoutType::mvn_planar); @@ -1705,8 +1729,9 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator } uni_vsubps(vmm_val, vmm_val, vmm_mean); - if (jcp_.normalize_variance) + if (jcp_.normalize_variance) { uni_vmulps(vmm_val, vmm_val, vmm_variance_inv); + } apply_post_ops(jcp_.dst_prc, vmm_val.getIdx(), jcp_.layout == MVNLayoutType::mvn_planar); @@ -1836,8 +1861,9 @@ bool MVN::isSupportedOperation(const std::shared_ptr& op, std::s // 4D: axes: [1,2,3], [2,3] // 5D: axes: [1,2,3,4], [2,3,4] auto axesVal = axesOp->cast_vector(); - for (int& axe : axesVal) + for (int& axe : axesVal) { axe = axe < 0 ? axe + inDataRank : axe; + } std::sort(axesVal.begin(), axesVal.end()); if (inDataRank == 1) { if (axesVal.size() != 1 || axesVal[0] != 0) { @@ -1886,8 +1912,9 @@ MVN::MVN(const std::shared_ptr& op, const GraphContext::CPtr& context) mvnAttrs.initAcrossChannels_ = false; const auto& inDataShapeSize = getInputShapeAtPort(0).getRank(); - if (inDataShapeSize == mvnOp->input_value(1).get_shape()[0] + 1 || inDataShapeSize == 1) + if (inDataShapeSize == mvnOp->input_value(1).get_shape()[0] + 1 || inDataShapeSize == 1) { mvnAttrs.initAcrossChannels_ = true; + } } else if (auto mvnOp = ov::as_type_ptr(op)) { mvnAttrs.normalizeVariance_ = mvnOp->get_normalize_variance(); mvnAttrs.epsValue_ = mvnOp->get_eps(); @@ -1921,13 +1948,15 @@ static inline bool isUnaryEltwise(const NodePtr& node) { } void MVN::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } ov::element::Type inputPrecision = getOriginalInputPrecisionAtPort(0); ov::element::Type outputPrecision = getOriginalOutputPrecisionAtPort(0); - if (!hasHardwareSupport(outputPrecision)) + if (!hasHardwareSupport(outputPrecision)) { outputPrecision = ov::element::f32; + } if (!fusedWith.empty()) { outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0); @@ -2038,8 +2067,9 @@ void MVN::initSupportedPrimitiveDescriptors() { } // planar - if (canBeInplace) + if (canBeInplace) { config.inConfs[0].inPlace(0); + } pushDesc(LayoutType::ncsp, impl_type); } @@ -2087,12 +2117,15 @@ MVN::MVNJitExecutor::MVNJitExecutor(const MVNAttrs& mvnAttrs, const dnnl::primit OPENVINO_THROW("Can't create jit MVN kernel"); } #endif // OPENVINO_ARCH_X86_64 - if (mvn_kernel) + if (mvn_kernel) { mvn_kernel->create_ker(); - if (mvn_mean_kernel) + } + if (mvn_mean_kernel) { mvn_mean_kernel->create_ker(); - if (mvn_variance_kernel) + } + if (mvn_variance_kernel) { mvn_variance_kernel->create_ker(); + } } void MVN::MVNJitExecutor::exec(const uint8_t* src_data, @@ -2123,12 +2156,15 @@ void MVN::MVNRefExecutor::exec(const uint8_t* src_data, void MVN::prepareParams() { auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isDefined()) + if (!dstMemPtr || !dstMemPtr->isDefined()) { OPENVINO_THROW("Destination memory is undefined."); - if (!srcMemPtr || !srcMemPtr->isDefined()) + } + if (!srcMemPtr || !srcMemPtr->isDefined()) { OPENVINO_THROW("Input memory is undefined."); - if (getSelectedPrimitiveDescriptor() == nullptr) + } + if (getSelectedPrimitiveDescriptor() == nullptr) { OPENVINO_THROW("Preferable primitive descriptor is not set."); + } const VectorDims in_dims = srcMemPtr->getStaticDims(); transformTo5DCase(in_dims); @@ -2340,10 +2376,11 @@ void MVN::MVNJitExecutor::mvn_pln(const uint8_t* src_data, }); float variance = 1.f; - if (mvnAttrs.epsMode_ == INSIDE_SQRT) + if (mvnAttrs.epsMode_ == INSIDE_SQRT) { variance /= sqrtf(variance_temp * C3inv + mvnAttrs.epsValue_); - else if (mvnAttrs.epsMode_ == OUTSIDE_SQRT) + } else if (mvnAttrs.epsMode_ == OUTSIDE_SQRT) { variance /= sqrtf(variance_temp * C3inv) + mvnAttrs.epsValue_; + } // mvn for one instance in batch parallel_for(C, [&](int c) { @@ -2403,10 +2440,11 @@ void MVN::MVNJitExecutor::mvn_pln(const uint8_t* src_data, arg.variance = static_cast(&variance); (*mvn_variance_kernel)(&arg); - if (mvnAttrs.epsMode_ == INSIDE_SQRT) + if (mvnAttrs.epsMode_ == INSIDE_SQRT) { variance = 1.f / sqrtf(variance * C2inv + mvnAttrs.epsValue_); - else if (mvnAttrs.epsMode_ == OUTSIDE_SQRT) + } else if (mvnAttrs.epsMode_ == OUTSIDE_SQRT) { variance = 1.f / (sqrtf(variance * C2inv) + mvnAttrs.epsValue_); + } // mvn for this channel (*mvn_kernel)(&arg); @@ -2463,10 +2501,11 @@ void MVN::MVNRefExecutor::mvn_ref(const uint8_t* src_data, uint8_t* dst_data, co }); float variance = 1.f; - if (mvnAttrs.epsMode_ == INSIDE_SQRT) + if (mvnAttrs.epsMode_ == INSIDE_SQRT) { variance = 1.f / sqrtf(variance_temp * C3inv + mvnAttrs.epsValue_); - else if (mvnAttrs.epsMode_ == OUTSIDE_SQRT) + } else if (mvnAttrs.epsMode_ == OUTSIDE_SQRT) { variance = 1.f / (sqrtf(variance_temp * C3inv) + mvnAttrs.epsValue_); + } parallel_for(C, [&](int c) { size_t cc = cb + c * C2; @@ -2500,10 +2539,11 @@ void MVN::MVNRefExecutor::mvn_ref(const uint8_t* src_data, uint8_t* dst_data, co variance += (src_data_ptr[cc + sp] - mean) * (src_data_ptr[cc + sp] - mean); } - if (mvnAttrs.epsMode_ == INSIDE_SQRT) + if (mvnAttrs.epsMode_ == INSIDE_SQRT) { variance = 1.f / sqrtf(variance * C2inv + mvnAttrs.epsValue_); - else if (mvnAttrs.epsMode_ == OUTSIDE_SQRT) + } else if (mvnAttrs.epsMode_ == OUTSIDE_SQRT) { variance = 1.f / (sqrtf(variance * C2inv) + mvnAttrs.epsValue_); + } // mvn for this channel for (size_t sp = 0lu; sp < C2; sp++) { @@ -2565,8 +2605,9 @@ void MVN::MVNJitExecutor::mvn_nspc(const uint8_t* src_data, } else if (2 == kernel_type) { arg.dst = dst_data + (b_offset + (start * C)) * dst_data_size; arg.mean = &mean_buffer[0]; - if (mvnAttrs.normalizeVariance_) + if (mvnAttrs.normalizeVariance_) { arg.variance = &variance_buffer[0]; + } arg.oc_off = 0; arg.post_op_data = post_ops_data_; } @@ -2604,32 +2645,37 @@ void MVN::MVNJitExecutor::mvn_nspc(const uint8_t* src_data, for (size_t i = 1; i < threads_num; i++) { variance_buffer[0] += variance_buffer[i]; } - if (mvnAttrs.epsMode_ == INSIDE_SQRT) + if (mvnAttrs.epsMode_ == INSIDE_SQRT) { variance_buffer[0] = 1.f / sqrtf(variance_buffer[0] * size_inv + mvnAttrs.epsValue_); - else if (mvnAttrs.epsMode_ == OUTSIDE_SQRT) + } else if (mvnAttrs.epsMode_ == OUTSIDE_SQRT) { variance_buffer[0] = 1.f / (sqrtf(variance_buffer[0] * size_inv) + mvnAttrs.epsValue_); + } } worker(true, 2); } else { // for per_channel float size_inv = 1.f / static_cast(D * H * W); worker(false, 0); for (size_t i = 1; i < threads_num; i++) { - for (size_t c = 0; c < C; c++) + for (size_t c = 0; c < C; c++) { mean_buffer[c] += mean_buffer[c + aux_buffer_size * i]; + } } - for (size_t c = 0; c < C; c++) + for (size_t c = 0; c < C; c++) { mean_buffer[c] *= size_inv; + } if (mvnAttrs.normalizeVariance_) { worker(false, 1); for (size_t i = 1; i < threads_num; i++) { - for (size_t c = 0; c < C; c++) + for (size_t c = 0; c < C; c++) { variance_buffer[c] += variance_buffer[c + aux_buffer_size * i]; + } } for (size_t c = 0; c < C; c++) { - if (mvnAttrs.epsMode_ == INSIDE_SQRT) + if (mvnAttrs.epsMode_ == INSIDE_SQRT) { variance_buffer[c] = 1.f / sqrtf(variance_buffer[c] * size_inv + mvnAttrs.epsValue_); - else if (mvnAttrs.epsMode_ == OUTSIDE_SQRT) + } else if (mvnAttrs.epsMode_ == OUTSIDE_SQRT) { variance_buffer[c] = 1.f / (sqrtf(variance_buffer[c] * size_inv) + mvnAttrs.epsValue_); + } } } worker(false, 2); @@ -2696,8 +2742,9 @@ void MVN::MVNJitExecutor::mvn_blk(const uint8_t* src_data, return mean_internal; } auto mean_buffer_ptr = &mean_buffer[aux_buffer_size * thread_idx]; - for (size_t i = 0; i < blk_size; i++) + for (size_t i = 0; i < blk_size; i++) { mean_buffer_ptr[i] = 0.f; + } auto arg = jit_mvn_call_args(); arg.src = src_data + src_offset * src_data_size; @@ -2709,8 +2756,9 @@ void MVN::MVNJitExecutor::mvn_blk(const uint8_t* src_data, (*mvn_mean_kernel)(&arg); // for W * blk size_t min_cb = (std::min)(blk_size, C - cb * blk_size); - for (size_t i = 0; i < min_cb; i++) + for (size_t i = 0; i < min_cb; i++) { mean_internal += mean_buffer_ptr[i]; + } return mean_internal; }); float mean = mean_temp * C5inv; @@ -2724,8 +2772,9 @@ void MVN::MVNJitExecutor::mvn_blk(const uint8_t* src_data, float variance_internal = 0.0f; auto variance_buffer_ptr = &variance_buffer[aux_buffer_size * static_cast(parallel_get_thread_num())]; - for (size_t i = 0; i < blk_size; i++) + for (size_t i = 0; i < blk_size; i++) { variance_buffer_ptr[i] = 0.f; + } auto arg = jit_mvn_call_args(); arg.src = src_data + src_offset * src_data_size; @@ -2738,16 +2787,18 @@ void MVN::MVNJitExecutor::mvn_blk(const uint8_t* src_data, (*mvn_variance_kernel)(&arg); size_t min_cb = (std::min)(blk_size, C - cb * blk_size); - for (size_t i = 0; i < min_cb; i++) + for (size_t i = 0; i < min_cb; i++) { variance_internal += variance_buffer_ptr[i]; + } return variance_internal; }); float variance = 1.f; - if (mvnAttrs.epsMode_ == INSIDE_SQRT) + if (mvnAttrs.epsMode_ == INSIDE_SQRT) { variance /= sqrtf(variance_temp * C5inv + mvnAttrs.epsValue_); - else if (mvnAttrs.epsMode_ == OUTSIDE_SQRT) + } else if (mvnAttrs.epsMode_ == OUTSIDE_SQRT) { variance /= sqrtf(variance_temp * C5inv) + mvnAttrs.epsValue_; + } // mvn for one instance in batch parallel_for3d(CB, D, H, [&](size_t cb, size_t d, size_t h) { @@ -2780,8 +2831,9 @@ void MVN::MVNJitExecutor::mvn_blk(const uint8_t* src_data, } } else { // for per_channel float size_inv = 1.f / static_cast(D * H * W); - for (size_t i = 0; i < mean_buffer.size(); i++) + for (size_t i = 0; i < mean_buffer.size(); i++) { mean_buffer[i] = 0.f; + } // one thread for one C*W size(the same H) to get C size result for the same H, added to last group result // keep the compute order the same as planar @@ -2806,15 +2858,18 @@ void MVN::MVNJitExecutor::mvn_blk(const uint8_t* src_data, }); for (size_t i = 1; i < threads_num; i++) { - for (size_t c = 0; c < C; c++) + for (size_t c = 0; c < C; c++) { mean_buffer[c] += mean_buffer[c + aux_buffer_size * i]; + } } - for (size_t c = 0; c < C; c++) + for (size_t c = 0; c < C; c++) { mean_buffer[c] *= size_inv; + } if (mvnAttrs.normalizeVariance_) { - for (size_t i = 0; i < variance_buffer.size(); i++) + for (size_t i = 0; i < variance_buffer.size(); i++) { variance_buffer[i] = 0.f; + } auto dh_loop = [&](size_t thr_idx, size_t d, size_t h) { for (size_t cb = 0; cb < CB; cb++) { @@ -2839,14 +2894,16 @@ void MVN::MVNJitExecutor::mvn_blk(const uint8_t* src_data, }); for (size_t i = 1; i < threads_num; i++) { - for (size_t c = 0; c < C; c++) + for (size_t c = 0; c < C; c++) { variance_buffer[c] += variance_buffer[c + aux_buffer_size * i]; + } } for (size_t c = 0; c < C; c++) { - if (mvnAttrs.epsMode_ == INSIDE_SQRT) + if (mvnAttrs.epsMode_ == INSIDE_SQRT) { variance_buffer[c] = 1.f / sqrtf(variance_buffer[c] * size_inv + mvnAttrs.epsValue_); - else if (mvnAttrs.epsMode_ == OUTSIDE_SQRT) + } else if (mvnAttrs.epsMode_ == OUTSIDE_SQRT) { variance_buffer[c] = 1.f / (sqrtf(variance_buffer[c] * size_inv) + mvnAttrs.epsValue_); + } } parallel_for2d(D, H, [&](size_t d, size_t h) { diff --git a/src/plugins/intel_cpu/src/nodes/ngram.cpp b/src/plugins/intel_cpu/src/nodes/ngram.cpp index 4a4c41e0a45d94..9e8d647a694f0f 100644 --- a/src/plugins/intel_cpu/src/nodes/ngram.cpp +++ b/src/plugins/intel_cpu/src/nodes/ngram.cpp @@ -52,8 +52,9 @@ Ngram::Ngram(const std::shared_ptr& op, const GraphContext::CPtr& cont } void Ngram::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } idcesPrecision = getOriginalInputPrecisionAtPort(1); if (idcesPrecision != ov::element::i32 && idcesPrecision != ov::element::i64) { @@ -134,8 +135,9 @@ void Ngram::execute(const dnnl::stream& strm) { srcData + srcBatchBias + srcWindowBias, dataSize * sizeof(float)); dstWindowBias += dataSize + curRightPad; - if (curLeftPad == 0) + if (curLeftPad == 0) { srcWindowBias += windowStride; + } } }); } diff --git a/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp b/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp index 8ad10c25ebf1a3..a34538fc24a9a2 100644 --- a/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp +++ b/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp @@ -114,8 +114,9 @@ NonMaxSuppression::NonMaxSuppression(const std::shared_ptr& op, const } void NonMaxSuppression::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } const auto inputs_num = inputShapes.size(); if (inputs_num > NMS_MAX_OUTPUT_BOXES_PER_CLASS) { @@ -357,8 +358,9 @@ void NonMaxSuppression::nmsWithSoftSigma(const float* boxes, // if is_soft_suppressed_by_iou is false, apply for all iou, including iou>iou_threshold, soft suppressed when score // < score_threshold if is_soft_suppressed_by_iou is true, hard suppressed by iou_threshold, then soft suppress auto coeff = [&](float iou) { - if (m_is_soft_suppressed_by_iou && iou > m_iou_threshold) + if (m_is_soft_suppressed_by_iou && iou > m_iou_threshold) { return 0.0f; + } return std::exp(m_scale * iou * iou); }; @@ -370,8 +372,9 @@ void NonMaxSuppression::nmsWithSoftSigma(const float* boxes, std::priority_queue, decltype(less)> sorted_boxes( less); // score, box_id, suppress_begin_index for (int box_idx = 0; box_idx < static_cast(m_boxes_num); box_idx++) { - if (scoresPtr[box_idx] > m_score_threshold) + if (scoresPtr[box_idx] > m_score_threshold) { sorted_boxes.emplace(boxInfo({scoresPtr[box_idx], box_idx, 0})); + } } size_t sorted_boxes_size = sorted_boxes.size(); size_t maxSeletedBoxNum = std::min(sorted_boxes_size, m_output_boxes_per_class); @@ -936,8 +939,9 @@ float NonMaxSuppression::intersectionOverUnion(const float* boxesI, const float* float areaI = (ymaxI - yminI) * (xmaxI - xminI); float areaJ = (ymaxJ - yminJ) * (xmaxJ - xminJ); - if (areaI <= 0.f || areaJ <= 0.f) + if (areaI <= 0.f || areaJ <= 0.f) { return 0.f; + } float intersection_area = (std::max)((std::min)(ymaxI, ymaxJ) - (std::max)(yminI, yminJ), 0.f) * (std::max)((std::min)(xmaxI, xmaxJ) - (std::max)(xminI, xminJ), 0.f); @@ -945,18 +949,23 @@ float NonMaxSuppression::intersectionOverUnion(const float* boxesI, const float* } void NonMaxSuppression::check1DInput(const Shape& shape, const std::string& name, const size_t port) { - if (shape.getRank() != 0 && shape.getRank() != 1) + if (shape.getRank() != 0 && shape.getRank() != 1) { THROW_CPU_NODE_ERR("has unsupported '", name, "' input rank: ", shape.getRank()); - if (shape.getRank() == 1) - if (shape.getDims()[0] != 1) + } + if (shape.getRank() == 1) { + if (shape.getDims()[0] != 1) { THROW_CPU_NODE_ERR("has unsupported '", name, "' input 1st dimension size: ", dim2str(shape.getDims()[0])); + } + } } void NonMaxSuppression::checkOutput(const Shape& shape, const std::string& name, const size_t port) { - if (shape.getRank() != 2) + if (shape.getRank() != 2) { THROW_CPU_NODE_ERR("has unsupported '", name, "' output rank: ", shape.getRank()); - if (shape.getDims()[1] != 3) + } + if (shape.getDims()[1] != 3) { THROW_CPU_NODE_ERR("has unsupported '", name, "' output 2nd dimension size: ", dim2str(shape.getDims()[1])); + } } bool NonMaxSuppression::isExecutable() const { diff --git a/src/plugins/intel_cpu/src/nodes/non_zero.cpp b/src/plugins/intel_cpu/src/nodes/non_zero.cpp index 5e9a09d13a9dcf..f68b3a0c2cf92e 100644 --- a/src/plugins/intel_cpu/src/nodes/non_zero.cpp +++ b/src/plugins/intel_cpu/src/nodes/non_zero.cpp @@ -43,15 +43,18 @@ NonZero::NonZero(const std::shared_ptr& op, const GraphContext::CPtr& } void NonZero::getSupportedDescriptors() { - if (getParentEdges().size() != 1) + if (getParentEdges().size() != 1) { THROW_CPU_NODE_ERR("has incorrect number of input edges: ", getParentEdges().size()); - if (!getChildEdges().size()) + } + if (!getChildEdges().size()) { THROW_CPU_NODE_ERR("has incorrect number of output edges: ", getChildEdges().size()); + } } void NonZero::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } const auto& inPrc = getOriginalInputPrecisionAtPort(0); if (!one_of(inPrc, @@ -88,8 +91,9 @@ std::vector NonZero::getNonZeroElementsCount(const T* src, const Shape& } default: { threadsCount = parallel_get_max_threads(); - if (inSize < static_cast(blockSize * threadsCount)) + if (inSize < static_cast(blockSize * threadsCount)) { threadsCount = 1; + } counts.resize(threadsCount); parallel_nt(threadsCount, [&](int ithr, int nthr) { @@ -159,8 +163,9 @@ void NonZero::executeSpecified() { redefineOutputMemory({newDims}); } int* dst = dstMemPtr->getDataAs(); - if (totalNonZeroCount == 0) + if (totalNonZeroCount == 0) { return; + } std::vector srcDims(inRank); std::transform(inShape.getDims().begin(), inShape.getDims().end(), srcDims.begin(), [](size_t x) { diff --git a/src/plugins/intel_cpu/src/nodes/normalize.cpp b/src/plugins/intel_cpu/src/nodes/normalize.cpp index 13322254ab4ee1..c82a1bccebe8e7 100644 --- a/src/plugins/intel_cpu/src/nodes/normalize.cpp +++ b/src/plugins/intel_cpu/src/nodes/normalize.cpp @@ -200,8 +200,9 @@ struct jit_uni_normalize_modulo_kernel_f32 : public jit_uni_normalize_modulo_ker default: assert(!"unknown dst_dt"); } - if (!isFloatCompatible(src_dt)) + if (!isFloatCompatible(src_dt)) { uni_vcvtdq2ps(vmm_src, vmm_src); + } } }; @@ -242,8 +243,9 @@ struct jit_uni_normalize_kernel_f32 : public jit_uni_normalize_kernel, public ji } } - if (mayiuse(avx512_core)) + if (mayiuse(avx512_core)) { uni_vcvtneps2bf16.reset(new jit_uni_vcvtneps2bf16(this, isa)); + } this->preamble(); @@ -255,8 +257,9 @@ struct jit_uni_normalize_kernel_f32 : public jit_uni_normalize_kernel, public ji mov(reg_post_ops_data, ptr[reg_params + GET_OFF(post_op_data)]); mov(reg_oc_off, ptr[reg_params + GET_OFF(oc_off)]); } - if (isa == avx512_core) + if (isa == avx512_core) { uni_vpxor(vmm_zero, vmm_zero, vmm_zero); + } if (jcp_.is_nchw) { normalize_nchw(); @@ -268,10 +271,12 @@ struct jit_uni_normalize_kernel_f32 : public jit_uni_normalize_kernel, public ji this->postamble(); - if (uni_vcvtneps2bf16) + if (uni_vcvtneps2bf16) { uni_vcvtneps2bf16->emit_data(); - for (auto& inj : eltwise_injectors) + } + for (auto& inj : eltwise_injectors) { inj->prepare_table(); + } } private: @@ -546,8 +551,9 @@ struct jit_uni_normalize_kernel_f32 : public jit_uni_normalize_kernel, public ji default: assert(!"unknown dst_dt"); } - if (!isFloatCompatible(src_dt)) + if (!isFloatCompatible(src_dt)) { uni_vcvtdq2ps(vmm_src, vmm_src); + } } inline void load_scalar(Xmm xmm_src, const Xbyak::Address& op, memory::data_type src_dt) { @@ -594,13 +600,15 @@ struct jit_uni_normalize_kernel_f32 : public jit_uni_normalize_kernel, public ji vpmovusdb(op, vmm_dst); } else { uni_vpackusdw(vmm_dst, vmm_dst, vmm_dst); - if (isa != cpu::x64::sse41) + if (isa != cpu::x64::sse41) { vpermq(ymm_dst, ymm_dst, 0x08); + } uni_vpackuswb(vmm_dst, vmm_dst, vmm_dst); - if (isa != cpu::x64::sse41) + if (isa != cpu::x64::sse41) { vmovq(op, xmm_dst); - else + } else { movd(op, xmm_dst); + } } } else if (dst_dt == memory::data_type::s8) { uni_vcvtps2dq(vmm_dst, vmm_dst); @@ -608,13 +616,15 @@ struct jit_uni_normalize_kernel_f32 : public jit_uni_normalize_kernel, public ji vpmovsdb(op, vmm_dst); } else { uni_vpackssdw(vmm_dst, vmm_dst, vmm_dst); - if (isa != cpu::x64::sse41) + if (isa != cpu::x64::sse41) { vpermq(ymm_dst, ymm_dst, 0x08); + } uni_vpacksswb(vmm_dst, vmm_dst, vmm_dst); - if (isa != cpu::x64::sse41) + if (isa != cpu::x64::sse41) { vmovq(op, xmm_dst); - else + } else { movd(op, xmm_dst); + } } } } @@ -661,14 +671,16 @@ struct jit_uni_normalize_kernel_f32 : public jit_uni_normalize_kernel, public ji for (int i = 0; i < p.len(); i++) { auto& post_op = p.entry_[i]; if (post_op.is_eltwise()) { - if (eltwise_injectors.size() <= eltwise_inj_idx || eltwise_injectors[eltwise_inj_idx] == nullptr) + if (eltwise_injectors.size() <= eltwise_inj_idx || eltwise_injectors[eltwise_inj_idx] == nullptr) { assert(!"Invalid eltwise injectors."); + } eltwise_injectors[eltwise_inj_idx]->compute_vector_range(vmm_val.getIdx(), vmm_val.getIdx() + 1); eltwise_inj_idx++; } else if (post_op.is_depthwise()) { if (depthwise_injectors.size() <= depthwise_inj_idx || - depthwise_injectors[depthwise_inj_idx] == nullptr) + depthwise_injectors[depthwise_inj_idx] == nullptr) { assert(!"Invalid depthwise injectors."); + } mov(reg_d_weights, ptr[reg_post_ops_data + post_ops_data_offset]); add(reg_d_weights, reg_oc_off); @@ -683,8 +695,9 @@ struct jit_uni_normalize_kernel_f32 : public jit_uni_normalize_kernel, public ji depthwise_inj_idx++; } else if (post_op.is_quantization()) { if (quantization_injectors.size() <= quantization_inj_idx || - quantization_injectors[quantization_inj_idx] == nullptr) + quantization_injectors[quantization_inj_idx] == nullptr) { assert(!"Invalid quantization injectors."); + } bool do_dequantization = post_op.quantization.alg == alg_kind::quantization_quantize_dequantize; bool do_rounding = do_dequantization || isFloatCompatible(dst_dt) || i != p.len() - 1; @@ -748,8 +761,9 @@ bool NormalizeL2::isSupportedOperation(const std::shared_ptr& op auto sortAxes = axes; std::sort(sortAxes.begin(), sortAxes.end()); for (size_t i = 0; i < sortAxes.size(); i++) { - if (sortAxes[i] != i + 1) + if (sortAxes[i] != i + 1) { return false; + } } return true; } @@ -780,8 +794,9 @@ NormalizeL2::NormalizeL2(const std::shared_ptr& op, const GraphContext OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } - if (inputShapes.size() != 2 || outputShapes.size() != 1) + if (inputShapes.size() != 2 || outputShapes.size() != 1) { THROW_CPU_NODE_ERR("has incorrect number of input/output edges"); + } if (getInputShapeAtPort(DATA).getRank() > 4 || getInputShapeAtPort(DATA).getRank() < 2) { THROW_CPU_NODE_ERR("has invalid input shape. Normalize supports from 2D to 4D blobs."); @@ -797,8 +812,9 @@ NormalizeL2::NormalizeL2(const std::shared_ptr& op, const GraphContext } void NormalizeL2::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } ov::element::Type inputPrecision = getOriginalInputPrecisionAtPort(DATA); ov::element::Type outputPrecision = getOriginalOutputPrecisionAtPort(DATA); @@ -808,10 +824,11 @@ void NormalizeL2::initSupportedPrimitiveDescriptors() { } if (inputPrecision == ov::element::bf16 || outputPrecision == ov::element::bf16) { - if (!mayiuse(avx512_core)) + if (!mayiuse(avx512_core)) { inputPrecision = outputPrecision = ov::element::f32; - else + } else { inputPrecision = outputPrecision = ov::element::bf16; + } } if (one_of(ov::element::f16, inputPrecision, outputPrecision) && mayiuse(cpu::x64::sse41)) { @@ -873,8 +890,9 @@ void NormalizeL2::initSupportedPrimitiveDescriptors() { } } } - if (canBeInplace) + if (canBeInplace) { config.inConfs[0].inPlace(0); + } pushDesc(LayoutType::ncsp, impl_type); } @@ -912,12 +930,15 @@ void NormalizeL2::setPostOps(dnnl::primitive_attr& kernel_attrs, const VectorDim void NormalizeL2::createPrimitive() { auto dstMemPtr = getDstMemoryAtPort(DATA); auto srcMemPtr = getSrcMemoryAtPort(DATA); - if (!dstMemPtr) + if (!dstMemPtr) { THROW_CPU_NODE_ERR("can't get destination memory"); - if (!srcMemPtr) + } + if (!srcMemPtr) { THROW_CPU_NODE_ERR("can't get input memory"); - if (getSelectedPrimitiveDescriptor() == nullptr) + } + if (getSelectedPrimitiveDescriptor() == nullptr) { THROW_CPU_NODE_ERR("has nullable preferable primitive descriptor"); + } if (!attrs.cornerCase) { if (srcMemPtr->getDesc().hasLayoutType(LayoutType::ncsp)) { @@ -934,8 +955,9 @@ void NormalizeL2::createPrimitive() { } if (inputShapesDefined()) { - if (needPrepareParams()) + if (needPrepareParams()) { prepareParams(); + } updateLastInputDims(); } } @@ -970,8 +992,9 @@ void NormalizeL2::executeDynamicImpl(const dnnl::stream& strm) { } void NormalizeL2::execute(const dnnl::stream& strm) { - if (!execPtr) + if (!execPtr) { THROW_CPU_NODE_ERR("doesn't have a compiled executor."); + } const uint8_t* src_ptr = getSrcDataAtPortAs(DATA); uint8_t* dst_ptr = getDstDataAtPortAs(DATA); @@ -1048,11 +1071,13 @@ class NormalizeL2::NormalizeL2JitExecutor : public NormalizeL2::NormalizeL2Execu OPENVINO_THROW("Jit Executor for NormalizeL2 cannot create kernels!"); } - if (normalize_kernel) + if (normalize_kernel) { normalize_kernel->create_ker(); + } - if (normalize_modulo_kernel) + if (normalize_modulo_kernel) { normalize_modulo_kernel->create_ker(); + } } void exec(const uint8_t* src_ptr, uint8_t* dst_ptr, const void** post_ops_data) override { @@ -1549,16 +1574,17 @@ std::shared_ptr NormalizeL2::NormalizeL2Execut const NormalizeL2Attrs& attrs, const dnnl::primitive_attr& kernel_attrs, const VectorDims& dims) { - if (attrs.cornerCase) + if (attrs.cornerCase) { return std::make_shared>(dims); #if defined(OPENVINO_ARCH_X86_64) - else if (mayiuse(cpu::x64::sse41)) + } else if (mayiuse(cpu::x64::sse41)) { return std::make_shared>(attrs, kernel_attrs, dims); #endif - else if (attrs.layout == LayoutType::ncsp) + } else if (attrs.layout == LayoutType::ncsp) { return std::make_shared>(attrs, kernel_attrs, dims); - else + } else { OPENVINO_THROW("'NormalizeL2' cannot create Executor"); + } } bool NormalizeL2::created() const { diff --git a/src/plugins/intel_cpu/src/nodes/one_hot.cpp b/src/plugins/intel_cpu/src/nodes/one_hot.cpp index efdcbc9f0d31db..f236053985339c 100644 --- a/src/plugins/intel_cpu/src/nodes/one_hot.cpp +++ b/src/plugins/intel_cpu/src/nodes/one_hot.cpp @@ -73,8 +73,9 @@ OneHot::OneHot(const std::shared_ptr& op, const GraphContext::CPtr& co } if (!(((1 + srcDims.size()) == dstDims.size()) || - (depthNode && (srcDims.size() == 1 && dstDims.size() == 1 && dstDims[0] == depth && srcDims[0] == 1)))) + (depthNode && (srcDims.size() == 1 && dstDims.size() == 1 && dstDims[0] == depth && srcDims[0] == 1)))) { THROW_CPU_NODE_ERR("has incorrect number of input/output dimensions!"); + } } bool OneHot::needShapeInfer() const { @@ -88,8 +89,9 @@ bool OneHot::needShapeInfer() const { } void OneHot::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } // check a precision of the input tensor auto input_precision = getOriginalInputPrecisionAtPort(INDICES_ID); @@ -141,8 +143,9 @@ void OneHot::execute(const dnnl::stream& strm) { auto input_dims = getParentEdgeAt(0)->getMemory().getStaticDims(); std::size_t actual_axis = (axis == -1) ? input_dims.size() : axis; - for (size_t i = 0; i < actual_axis; ++i) + for (size_t i = 0; i < actual_axis; ++i) { prefix_size *= input_dims[i]; + } std::size_t suffix_size = getParentEdgeAt(0)->getMemory().getShape().getElementsCount() / prefix_size; diff --git a/src/plugins/intel_cpu/src/nodes/pad.cpp b/src/plugins/intel_cpu/src/nodes/pad.cpp index 4d651e2d4f87a1..530fb84f15a3f3 100644 --- a/src/plugins/intel_cpu/src/nodes/pad.cpp +++ b/src/plugins/intel_cpu/src/nodes/pad.cpp @@ -44,15 +44,18 @@ Pad::Pad(const std::shared_ptr& op, const GraphContext::CPtr& context) if (!isSupportedOperation(op, errorMessage)) { OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } - if (inputShapes.size() != 3 && inputShapes.size() != 4) + if (inputShapes.size() != 3 && inputShapes.size() != 4) { THROW_CPU_NODE_ERR("has incorrect number of input edges"); - if (outputShapes.size() != 1) + } + if (outputShapes.size() != 1) { THROW_CPU_NODE_ERR("Incorrect number of output edges"); + } const size_t srcDimsRank = inputShapes[DATA_ID].getRank(); const size_t dstDimsRank = outputShapes[DATA_ID].getRank(); - if (srcDimsRank != dstDimsRank) + if (srcDimsRank != dstDimsRank) { THROW_CPU_NODE_ERR("has incorrect number of input/output dimensions!"); + } auto pad = ov::as_type(op.get()); if (!pad) { @@ -63,8 +66,9 @@ Pad::Pad(const std::shared_ptr& op, const GraphContext::CPtr& context) !ov::is_type(op->get_input_node_shared_ptr(PADS_END_ID)); auto fillingInParameters = [&](VectorIdxs& parameter, const size_t type) { - if (type < PADS_BEGIN_ID) + if (type < PADS_BEGIN_ID) { return; + } const auto constNode = ov::as_type_ptr(op->get_input_node_shared_ptr(type)); if (constNode) { @@ -72,8 +76,9 @@ Pad::Pad(const std::shared_ptr& op, const GraphContext::CPtr& context) for (const auto& value : pad_data) { parameter.push_back(value); } - if (parameter.size() != srcDimsRank) + if (parameter.size() != srcDimsRank) { THROW_CPU_NODE_ERR("has incorrect number of input/output dimensions!"); + } } }; @@ -86,8 +91,9 @@ Pad::Pad(const std::shared_ptr& op, const GraphContext::CPtr& context) attrs.padMode = CONSTANT; if (isPadValueSpecified && op->get_input_node_shared_ptr(PAD_VALUE_ID)->get_type_info() == ov::op::v0::Constant::get_type_info_static()) { - if (!ov::is_scalar(pad->get_input_shape(PAD_VALUE_ID))) + if (!ov::is_scalar(pad->get_input_shape(PAD_VALUE_ID))) { THROW_CPU_NODE_ERR("has non scalar 'pad_value' input"); + } attrs.padValue = ov::as_type_ptr(pad->get_input_node_shared_ptr(PAD_VALUE_ID)) ->cast_vector()[0]; attrs.constPadValue = true; @@ -106,14 +112,16 @@ Pad::Pad(const std::shared_ptr& op, const GraphContext::CPtr& context) void Pad::getSupportedDescriptors() {} void Pad::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } std::vector supportedPrecisions = {ov::element::f32, ov::element::i32, ov::element::bf16, ov::element::f16, ov::element::i8, ov::element::u8}; ov::element::Type precision = getOriginalInputPrecisionAtPort(DATA_ID); - if (std::find(supportedPrecisions.begin(), supportedPrecisions.end(), precision) == supportedPrecisions.end()) + if (std::find(supportedPrecisions.begin(), supportedPrecisions.end(), precision) == supportedPrecisions.end()) { precision = precision.is_real() ? ov::element::f32 : ov::element::i32; + } const auto& inputDataShape = getInputShapeAtPort(DATA_ID); const size_t numOfDims = inputDataShape.getRank(); @@ -130,17 +138,19 @@ void Pad::initSupportedPrimitiveDescriptors() { creatorsMap.at(LayoutType::ncsp)->createSharedDesc(ov::element::i32, getInputShapeAtPort(PADS_BEGIN_ID))); config.inConfs[2].setMemDesc( creatorsMap.at(LayoutType::ncsp)->createSharedDesc(ov::element::i32, getInputShapeAtPort(PADS_END_ID))); - if (isPadValueSpecified) + if (isPadValueSpecified) { config.inConfs[3].setMemDesc(creatorsMap.at(LayoutType::ncsp) ->createSharedDesc(ov::element::f32, getInputShapeAtPort(PAD_VALUE_ID))); + } config.outConfs[0].setMemDesc( creatorsMap.at(memoryFormat)->createSharedDesc(precision, getOutputShapeAtPort(DATA_ID))); supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref}); }; - if (numOfDims == 4 || numOfDims == 5) + if (numOfDims == 4 || numOfDims == 5) { pushSupportedPrimitiveDescriptor(LayoutType::nspc); + } pushSupportedPrimitiveDescriptor(LayoutType::ncsp); @@ -154,10 +164,12 @@ void Pad::initSupportedPrimitiveDescriptors() { if (numOfDims == 4 || numOfDims == 5) { if (!shapeHasDataDependency) { - if (canUseBlocked(8)) + if (canUseBlocked(8)) { pushSupportedPrimitiveDescriptor(LayoutType::nCsp8c); - if (canUseBlocked(16)) + } + if (canUseBlocked(16)) { pushSupportedPrimitiveDescriptor(LayoutType::nCsp16c); + } } } } @@ -221,10 +233,12 @@ void Pad::PadExecutor::paramsInitialization(const PadAttrs& attrs, params.attrs = attrs; auto& srcMemPtr = srcMemory[DATA_ID]; auto& dstMemPtr = dstMemory[DATA_ID]; - if (!dstMemPtr || !dstMemPtr->isDefined()) + if (!dstMemPtr || !dstMemPtr->isDefined()) { OPENVINO_THROW("Pad executor has undefined source memory."); - if (!srcMemPtr || !srcMemPtr->isDefined()) + } + if (!srcMemPtr || !srcMemPtr->isDefined()) { OPENVINO_THROW("Pad executor has undefined destination memory."); + } const auto srcBlockMemDesc = srcMemPtr->getDescWithType(); const auto dstBlockMemDesc = dstMemPtr->getDescWithType(); const auto& srcDims = srcBlockMemDesc->getBlockDims(); @@ -243,12 +257,15 @@ void Pad::PadExecutor::paramsInitialization(const PadAttrs& attrs, } }; // if pad begin/end/value dynamic - if (params.attrs.padsBegin.empty()) + if (params.attrs.padsBegin.empty()) { fillingInParameters(params.attrs.padsBegin, PADS_BEGIN_ID, srcDims.size(), 0); - if (params.attrs.padsEnd.empty()) + } + if (params.attrs.padsEnd.empty()) { fillingInParameters(params.attrs.padsEnd, PADS_END_ID, srcDims.size(), 0); - if (!params.attrs.constPadValue) + } + if (!params.attrs.constPadValue) { params.attrs.padValue = srcMemory[PAD_VALUE_ID]->getDataAs()[0]; + } // pads are constant, so we can calculate new collapsing pads for first target dimensions and use it for the next // dimensions to avoid permanent identical pad calculations const size_t blockSize = srcMemPtr->getDesc().hasLayoutType(LayoutType::nCsp16c) @@ -343,14 +360,16 @@ void Pad::PadExecutor::workPartition() { } params.srcODims.clear(); - for (size_t i = 0; i < params.srcDims.size(); ++i) + for (size_t i = 0; i < params.srcDims.size(); ++i) { params.srcODims.push_back(params.attrs.padsBegin[i] + params.srcDims[i]); + } params.srcDimsForReflectOrSymmetric.clear(); if (params.attrs.padMode == REFLECT || params.attrs.padMode == SYMMETRIC) { int shift = params.attrs.padMode == SYMMETRIC ? 1 : 0; - for (size_t i = 0; i < params.srcDims.size(); ++i) + for (size_t i = 0; i < params.srcDims.size(); ++i) { params.srcDimsForReflectOrSymmetric.push_back(params.srcDims[i] + params.srcODims[i] - 2 + shift); + } } } @@ -388,8 +407,9 @@ void Pad::PadExecutor::exec(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemP } void Pad::execute(const dnnl::stream& strm) { - if (!execPtr) + if (!execPtr) { THROW_CPU_NODE_ERR("has not compiled executor."); + } execPtr->exec(getSrcMemoryAtPort(0), getDstMemoryAtPort(0)); } @@ -409,10 +429,11 @@ static inline size_t parallel_init(size_t start, size_t nDims, const VectorDims& static inline void parallel_step(size_t nDims, const VectorDims& dims, std::vector& indexes) { for (int j = nDims - 1; j >= 0; --j) { ++indexes[j]; - if (static_cast(indexes[j]) < dims[j]) + if (static_cast(indexes[j]) < dims[j]) { break; - else + } else { indexes[j] = 0; + } } } @@ -462,8 +483,9 @@ void Pad::PadExecutor::padConstantCommon(const MemoryPtr& srcMemPtr, const Memor for (size_t iwork = start; iwork < end; ++iwork, dstIdx += params.lastDstDim) { size_t j = 0; for (; j < params.nDimsForWork; ++j) { - if (indexes[j] < params.attrs.padsBegin[j] || static_cast(indexes[j]) >= params.srcODims[j]) + if (indexes[j] < params.attrs.padsBegin[j] || static_cast(indexes[j]) >= params.srcODims[j]) { break; + } } if (j != params.nDimsForWork) { @@ -473,8 +495,9 @@ void Pad::PadExecutor::padConstantCommon(const MemoryPtr& srcMemPtr, const Memor } size_t srcIdx = 0; - for (size_t idx = 0; idx < params.nDimsForWork; ++idx) + for (size_t idx = 0; idx < params.nDimsForWork; ++idx) { srcIdx += (indexes[idx] - params.attrs.padsBegin[idx]) * params.srcStrides[idx]; + } std::fill_n(&dstData[dstIdx], params.innerBeginShift, value); cpu_memcpy(&dstData[dstIdx + params.innerBeginShift], @@ -504,8 +527,9 @@ void Pad::PadExecutor::padConstantZero(const MemoryPtr& srcMemPtr, const MemoryP for (size_t iwork = start; iwork < end; ++iwork, dstIdx += params.lastDstDim) { size_t j = 0; for (; j < params.nDimsForWork; ++j) { - if (indexes[j] < params.attrs.padsBegin[j] || static_cast(indexes[j]) >= params.srcODims[j]) + if (indexes[j] < params.attrs.padsBegin[j] || static_cast(indexes[j]) >= params.srcODims[j]) { break; + } } if (j != params.nDimsForWork) { @@ -515,8 +539,9 @@ void Pad::PadExecutor::padConstantZero(const MemoryPtr& srcMemPtr, const MemoryP } size_t srcIdx = 0; - for (size_t idx = 0; idx < params.nDimsForWork; ++idx) + for (size_t idx = 0; idx < params.nDimsForWork; ++idx) { srcIdx += (indexes[idx] - params.attrs.padsBegin[idx]) * params.srcStrides[idx]; + } srcIdx *= params.dataSize; memset(&dstData[dstIdx], 0, params.innerBeginShift); @@ -556,17 +581,19 @@ void Pad::PadExecutor::padEdge(const MemoryPtr& srcMemPtr, const MemoryPtr& dstM } srcIdx *= params.dataSize; - for (size_t i = 0; i < params.innerBeginPadCount; ++i) + for (size_t i = 0; i < params.innerBeginPadCount; ++i) { cpu_memcpy(&dstData[dstIdx + i * params.shift], &srcData[srcIdx], params.shift); + } cpu_memcpy(&dstData[dstIdx + params.innerBeginShift], &srcData[srcIdx + params.innerSrcShift], params.innerCopySize); - for (size_t i = 0; i < params.innerEndPadCount; ++i) + for (size_t i = 0; i < params.innerEndPadCount; ++i) { cpu_memcpy(&dstData[dstIdx + params.innerBeginShift + params.innerCopySize + i * params.shift], &srcData[srcIdx + (params.srcDims[params.nDimsForWork] - 1) * params.shift], params.shift); + } parallel_step(params.nDimsForWork, params.dstDims, indexes); } @@ -605,19 +632,21 @@ void Pad::PadExecutor::padReflectOrSymmetric(const MemoryPtr& srcMemPtr, } srcIdx *= params.dataSize; - for (size_t i = 0; i < params.innerBeginPadCount; ++i) + for (size_t i = 0; i < params.innerBeginPadCount; ++i) { cpu_memcpy(&dstData[dstIdx + i * params.shift], &srcData[srcIdx + (params.attrs.padsBegin[params.nDimsForWork] - shift - i) * params.shift], params.shift); + } cpu_memcpy(&dstData[dstIdx + params.innerBeginShift], &srcData[srcIdx + params.innerSrcShift], params.innerCopySize); - for (size_t i = 0; i < params.innerEndPadCount; ++i) + for (size_t i = 0; i < params.innerEndPadCount; ++i) { cpu_memcpy(&dstData[dstIdx + (params.srcODims[params.nDimsForWork] + i) * params.shift], &srcData[srcIdx + endSrcShift - i * params.shift], params.shift); + } parallel_step(params.nDimsForWork, params.dstDims, indexes); } @@ -625,8 +654,9 @@ void Pad::PadExecutor::padReflectOrSymmetric(const MemoryPtr& srcMemPtr, } inline void Pad::PadExecutor::getDstIdx(const VectorIdxs& indexes, size_t& dstIdx) const { - for (size_t i = 0; i < params.nDimsForWork; ++i) + for (size_t i = 0; i < params.nDimsForWork; ++i) { dstIdx += indexes[i] * params.dstStrides[i]; + } } bool Pad::created() const { diff --git a/src/plugins/intel_cpu/src/nodes/paged_attn.cpp b/src/plugins/intel_cpu/src/nodes/paged_attn.cpp index 484e2af6d96b19..61277dd4dfc206 100644 --- a/src/plugins/intel_cpu/src/nodes/paged_attn.cpp +++ b/src/plugins/intel_cpu/src/nodes/paged_attn.cpp @@ -63,8 +63,9 @@ PagedAttention::PagedAttention(const std::shared_ptr& op, const GraphC } void PagedAttention::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } auto rtPrecision = getRuntimePrecision(); NodeConfig config; @@ -208,8 +209,9 @@ void PagedAttention::execute(const dnnl::stream& strm) { size_t len = 0; const auto& pastLensDims = inputs[5]->getStaticDims(); auto pastLens = inputs[5]->getDataAs(); - for (size_t i = 0; i < pastLensDims[0]; i++) + for (size_t i = 0; i < pastLensDims[0]; i++) { len += pastLens[i]; + } len += outDims[0]; VectorDims scoreDims{len}; redefineOutputMemory({outDims, scoreDims}); @@ -218,8 +220,9 @@ void PagedAttention::execute(const dnnl::stream& strm) { } outputs[0] = getDstMemoryAtPort(0); - if (m_hasScore) + if (m_hasScore) { outputs[1] = getDstMemoryAtPort(1); + } m_executor->execute(inputs, outputs); } diff --git a/src/plugins/intel_cpu/src/nodes/pooling.cpp b/src/plugins/intel_cpu/src/nodes/pooling.cpp index 7b1bb1653404bc..28ca59bc6bc9e2 100644 --- a/src/plugins/intel_cpu/src/nodes/pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/pooling.cpp @@ -215,27 +215,28 @@ Pooling::Pooling(const std::shared_ptr& op, const GraphContext::CPtr& } std::vector Pooling::getAvailableFormatsForDims(const Shape& dims) const { - if (dims.getRank() == 0) + if (dims.getRank() == 0) { return {memory::format_tag::x}; - else if (dims.getRank() == 1) + } else if (dims.getRank() == 1) { return {memory::format_tag::x}; - else if (dims.getRank() == 2) + } else if (dims.getRank() == 2) { return {memory::format_tag::nc}; - else if (dims.getRank() == 3) + } else if (dims.getRank() == 3) { return {memory::format_tag::nCw8c, memory::format_tag::nCw16c, memory::format_tag::nwc, memory::format_tag::ncw}; - else if (dims.getRank() == 4) + } else if (dims.getRank() == 4) { return {memory::format_tag::nChw8c, memory::format_tag::nChw16c, memory::format_tag::nhwc, memory::format_tag::nchw}; - else if (dims.getRank() == 5) + } else if (dims.getRank() == 5) { return {memory::format_tag::nCdhw8c, memory::format_tag::nCdhw16c, memory::format_tag::ndhwc, memory::format_tag::ncdhw}; + } return {memory::format_tag::any}; } @@ -260,13 +261,16 @@ void Pooling::initEffectiveAttributes(const Shape& inShape, const Shape& outShap } void Pooling::getSupportedDescriptors() { - if (!descs.empty()) + if (!descs.empty()) { return; + } - if (getParentEdges().size() != 1) + if (getParentEdges().size() != 1) { OPENVINO_THROW("Incorrect number of input edges for layer ", getName()); - if (getChildEdges().empty()) + } + if (getChildEdges().empty()) { OPENVINO_THROW("Incorrect number of output edges for layer ", getName()); + } ov::element::Type inputPrecision = getOriginalInputPrecisionAtPort(0); ov::element::Type outputPrecision = getOriginalOutputPrecisionAtPort(0); @@ -323,8 +327,9 @@ void Pooling::getSupportedDescriptors() { "getSupportedDescriptors()"); } #endif - if (useACL) + if (useACL) { return; + } // WA: LPT transformation has WA which allows average pooling has I8/U8 output precision instead of FP32, // so we explicitly set output precision as FP32 @@ -347,15 +352,17 @@ void Pooling::getSupportedDescriptors() { auto inputDataType = DnnlExtensionUtils::ElementTypeToDataType(inputPrecision); auto outputDataType = DnnlExtensionUtils::ElementTypeToDataType(outputPrecision); - if ((inputRank < 3) || (inputRank > 5)) + if ((inputRank < 3) || (inputRank > 5)) { OPENVINO_THROW("Pooling layer. Unsupported mode. Only 3D, 4D and 5D blobs are supported as input."); + } initEffectiveAttributes(inShape, MemoryDescUtils::makeDummyShape(childShape)); if (inputPrecision == ov::element::i8 || inputPrecision == ov::element::u8) { // We have to extend i8i8_pooling_fwd_t from oneDNN to support BF16 output data type - if (one_of(outputDataType, memory::data_type::bf16, memory::data_type::f16)) + if (one_of(outputDataType, memory::data_type::bf16, memory::data_type::f16)) { outputDataType = memory::data_type::f32; + } // i8 layers supports only ndhwc and nhwc layouts const auto in_candidate = std::make_shared( parentShape, @@ -397,8 +404,9 @@ void Pooling::getSupportedDescriptors() { void Pooling::prepareParams() { auto selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) + if (selected_pd == nullptr) { OPENVINO_THROW("Pooling node with name '", getName(), "' did not set preferable primitive descriptor"); + } AttrPtr attr; if (isDynamicNode()) { @@ -418,10 +426,12 @@ void Pooling::prepareParams() { if (useACL) { auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isDefined()) + if (!dstMemPtr || !dstMemPtr->isDefined()) { OPENVINO_THROW("Destination memory is undefined."); - if (!srcMemPtr || !srcMemPtr->isDefined()) + } + if (!srcMemPtr || !srcMemPtr->isDefined()) { OPENVINO_THROW("Input memory is undefined."); + } std::vector srcMemoryDescs; for (size_t i = 0; i < getOriginalInputsNumber(); i++) { @@ -474,8 +484,9 @@ void Pooling::prepareParams() { auto first_desc = dnnl::pooling_forward::primitive_desc(prim_desc.get()); const bool found = DnnlExtensionUtils::find_implementation(prim_desc, key.implType); - if (found) + if (found) { return std::make_shared(prim_desc); + } // use the first available return std::make_shared(first_desc); @@ -547,10 +558,11 @@ dnnl::algorithm Pooling::getPoolingAlgorithm() const { break; } } - if (!poolingAttrs.exclude_pad && (not_zero_l || not_zero_r)) + if (!poolingAttrs.exclude_pad && (not_zero_l || not_zero_r)) { return dnnl::algorithm::pooling_avg_include_padding; - else + } else { return dnnl::algorithm::pooling_avg_exclude_padding; + } } else if (algorithm == Algorithm::PoolingMax) { return dnnl::algorithm::pooling_max; } else { @@ -597,13 +609,15 @@ void Pooling::createDescriptor(const std::vector& inputDesc, auto desc = createDescriptorInternal(in_candidate, out_candidate, getPoolingAlgorithm()); - if (desc) + if (desc) { descs.emplace_back(desc); + } } void Pooling::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } if (useACL) { auto& creatorsMap = BlockedDescCreator::getCommonCreators(); @@ -708,14 +722,16 @@ void Pooling::initSupportedPrimitiveDescriptors() { // fallback. if none of the primitive types is present in the priority list just add first implementation // @todo this fallback is not necessary if primitive priority list is filled correctly - if (supportedPrimitiveDescriptors.empty()) + if (supportedPrimitiveDescriptors.empty()) { addSupportedPrimitiveDescriptor(first_desc); + } } } void Pooling::initDescriptor(const NodeConfig& config) { - if (useACL) + if (useACL) { return; + } Node::initDescriptor(config); } diff --git a/src/plugins/intel_cpu/src/nodes/priorbox.cpp b/src/plugins/intel_cpu/src/nodes/priorbox.cpp index 3bf6a47797e044..8c3ecbcd53dd02 100644 --- a/src/plugins/intel_cpu/src/nodes/priorbox.cpp +++ b/src/plugins/intel_cpu/src/nodes/priorbox.cpp @@ -124,8 +124,9 @@ bool PriorBox::needPrepareParams() const { } void PriorBox::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } addSupportedPrimDesc({{LayoutType::ncsp, ov::element::i32}, {LayoutType::ncsp, ov::element::i32}}, {{LayoutType::ncsp, ov::element::f32}}, @@ -134,8 +135,9 @@ void PriorBox::initSupportedPrimitiveDescriptors() { void PriorBox::createPrimitive() { if (inputShapesDefined()) { - if (needPrepareParams()) + if (needPrepareParams()) { prepareParams(); + } updateLastInputDims(); } } @@ -158,12 +160,14 @@ void PriorBox::execute(const dnnl::stream& strm) { auto min_size_ = min_size; if (!scale_all_sizes) { // mxnet-like PriorBox - if (step_ == -1) + if (step_ == -1) { step_ = 1.f * IH / H; - else + } else { step_ *= IH; - for (auto& size : min_size_) + } + for (auto& size : min_size_) { size *= IH; + } } int64_t idx = 0; diff --git a/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp b/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp index 26cd97f2334a7f..f068fff39c5f2f 100644 --- a/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp +++ b/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp @@ -77,8 +77,9 @@ bool PriorBoxClustered::needPrepareParams() const { } void PriorBoxClustered::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } addSupportedPrimDesc({{LayoutType::ncsp, ov::element::i32}, {LayoutType::ncsp, ov::element::i32}}, {{LayoutType::ncsp, ov::element::f32}}, @@ -87,8 +88,9 @@ void PriorBoxClustered::initSupportedPrimitiveDescriptors() { void PriorBoxClustered::createPrimitive() { if (inputShapesDefined()) { - if (needPrepareParams()) + if (needPrepareParams()) { prepareParams(); + } updateLastInputDims(); } } @@ -143,11 +145,13 @@ void PriorBoxClustered::execute(const dnnl::stream& strm) { // 1. A single variance value (to be repeated 4 times for each prior) // 2. 4 variance values if (var_size == 1) { - for (size_t j = 0; j < 4; j++) + for (size_t j = 0; j < 4; j++) { dst_data[idx + j + out_shape[1]] = variances[0]; + } } else { - for (size_t j = 0; j < var_size; j++) + for (size_t j = 0; j < var_size; j++) { dst_data[idx + j + out_shape[1]] = variances[j]; + } } } }); diff --git a/src/plugins/intel_cpu/src/nodes/proposal.cpp b/src/plugins/intel_cpu/src/nodes/proposal.cpp index 6fefd4ca3e24b9..e049e0e33bad1a 100644 --- a/src/plugins/intel_cpu/src/nodes/proposal.cpp +++ b/src/plugins/intel_cpu/src/nodes/proposal.cpp @@ -138,8 +138,9 @@ Proposal::Proposal(const std::shared_ptr& op, const GraphContext::CPtr } void Proposal::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } if (store_prob) { addSupportedPrimDesc({{LayoutType::ncsp, ov::element::f32}, @@ -167,8 +168,9 @@ void Proposal::execute(const dnnl::stream& strm) { const float* imgInfoData = getSrcDataAtPortAs(IMG_INFO_IN_IDX); float* outRoiData = reinterpret_cast(getDstDataAtPort(ROI_OUT_IDX)); float* outProbData = nullptr; - if (store_prob) + if (store_prob) { outProbData = reinterpret_cast(getDstDataAtPort(PROBABILITIES_OUT_IDX)); + } auto inProbDims = getParentEdgeAt(0)->getMemory().getStaticDims(); const size_t imgInfoSize = getParentEdgeAt(2)->getMemory().getStaticDims()[0]; diff --git a/src/plugins/intel_cpu/src/nodes/proposal_imp.cpp b/src/plugins/intel_cpu/src/nodes/proposal_imp.cpp index 77671d292ec26a..0bd1e7d75b8dea 100644 --- a/src/plugins/intel_cpu/src/nodes/proposal_imp.cpp +++ b/src/plugins/intel_cpu/src/nodes/proposal_imp.cpp @@ -165,12 +165,14 @@ static void nms_cpu(const int num_boxes, #endif for (int box = 0; box < num_boxes; ++box) { - if (is_dead[box]) + if (is_dead[box]) { continue; + } index_out[count++] = base_index + box; - if (count == max_num_out) + if (count == max_num_out) { break; + } int tail = box + 1; @@ -257,8 +259,9 @@ static void nms_cpu(const int num_boxes, res = area / (A_area + B_area - area); } - if (nms_thresh < res) + if (nms_thresh < res) { is_dead[tail] = 1; + } } } @@ -311,8 +314,9 @@ static void retrieve_rois_cpu(const int num_rois, rois[roi * 5 + 3] = x1; rois[roi * 5 + 4] = y1; - if (probs) + if (probs) { probs[roi] = src_probs[index]; + } }); if (num_rois < post_nms_topn_) { diff --git a/src/plugins/intel_cpu/src/nodes/psroi_pooling.cpp b/src/plugins/intel_cpu/src/nodes/psroi_pooling.cpp index e171b0113ac4c5..4c6065c2c06002 100644 --- a/src/plugins/intel_cpu/src/nodes/psroi_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/psroi_pooling.cpp @@ -70,16 +70,20 @@ PSROIPooling::PSROIPooling(const std::shared_ptr& op, const GraphConte const auto defPsroi = ov::as_type_ptr(op); noTrans = op->get_input_size() == 2; - if (op->get_input_shape(0).size() != 4) + if (op->get_input_shape(0).size() != 4) { THROW_CPU_NODE_ERR("has first input with incorrect rank: " + std::to_string(op->get_input_shape(0).size())); - if (op->get_input_shape(1).size() != 2) + } + if (op->get_input_shape(1).size() != 2) { THROW_CPU_NODE_ERR("has second input with incorrect rank: " + std::to_string(op->get_input_shape(1).size())); - if (!noTrans && op->get_input_shape(2).size() != 4) + } + if (!noTrans && op->get_input_shape(2).size() != 4) { THROW_CPU_NODE_ERR("has third input with incorrect rank: " + std::to_string(op->get_input_shape(2).size())); + } if (psroi) { - if (psroi->get_input_size() != 2) + if (psroi->get_input_size() != 2) { THROW_CPU_NODE_ERR("has incorrect number of input/output edges!"); + } mode = psroi->get_mode(); if (mode == "average") { @@ -98,8 +102,9 @@ PSROIPooling::PSROIPooling(const std::shared_ptr& op, const GraphConte pooledWidth = groupSize; } else if (defPsroi) { - if (defPsroi->get_input_size() != 2 && defPsroi->get_input_size() != 3) + if (defPsroi->get_input_size() != 2 && defPsroi->get_input_size() != 3) { THROW_CPU_NODE_ERR("has incorrect number of input/output edges!"); + } algorithm = Algorithm::PSROIPoolingBilinearDeformable; @@ -130,8 +135,9 @@ PSROIPooling::PSROIPooling(const std::shared_ptr& op, const GraphConte } void PSROIPooling::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } impl_desc_type impl_type; if (mayiuse(cpu::x64::avx512_core)) { @@ -197,26 +203,28 @@ void PSROIPooling::unpackParams(const BlockedMemoryDesc& srcDesc, int& inBlockSize, int& outBlockSize, int& outBlockCount, - unsigned long& inputChannelsPadding, - unsigned long& outputChannelsPadding) { + uint64_t& inputChannelsPadding, + uint64_t& outputChannelsPadding) { const bool inpIsBlk = srcDesc.hasLayoutType(LayoutType::nCsp16c) || srcDesc.hasLayoutType(LayoutType::nCsp8c); const bool outIsBlk = dstDesc.hasLayoutType(LayoutType::nCsp16c) || dstDesc.hasLayoutType(LayoutType::nCsp8c); size_t expectedInBlockDimsSize = (inpIsBlk ? 5 : 4); size_t expectedOutBlockDimsSize = (outIsBlk ? 5 : 4); const auto& inBlkDims = srcDesc.getBlockDims(); const auto& outBlkDims = dstDesc.getBlockDims(); - if (inBlkDims.size() != expectedInBlockDimsSize) + if (inBlkDims.size() != expectedInBlockDimsSize) { THROW_CPU_NODE_ERR("has unexpected size of blocking dims in input (given ", inBlkDims.size(), ", expected ", expectedInBlockDimsSize, ")"); - if (outBlkDims.size() != expectedOutBlockDimsSize) + } + if (outBlkDims.size() != expectedOutBlockDimsSize) { THROW_CPU_NODE_ERR("has unexpected size of blocking dims in output (given ", outBlkDims.size(), ", expected ", expectedOutBlockDimsSize, ")"); + } inBlockSize = (inpIsBlk ? srcDesc.getBlockDims()[4] : 1); outBlockSize = (outIsBlk ? dstDesc.getBlockDims()[4] : 1); @@ -228,16 +236,20 @@ void PSROIPooling::unpackParams(const BlockedMemoryDesc& srcDesc, const auto& outOrder = dstDesc.getOrder(); const auto& inOrder = srcDesc.getOrder(); for (size_t i = 0; i < outOrder.size(); i++) { - if (outOrder[i] == 2) + if (outOrder[i] == 2) { hOutStrIndex = i; - if (outOrder[i] == 3) + } + if (outOrder[i] == 3) { wOutStrIndex = i; + } } for (size_t i = 0; i < inOrder.size(); i++) { - if (inOrder[i] == 2) + if (inOrder[i] == 2) { hInStrIndex = i; - if (inOrder[i] == 3) + } + if (inOrder[i] == 3) { wInStrIndex = i; + } } hInputStride = srcDesc.getStrides()[hInStrIndex]; wInputStride = srcDesc.getStrides()[wInStrIndex]; @@ -254,7 +266,7 @@ void PSROIPooling::executeAverage(const inputType* srcData, const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc) { int inBlockSize, outBlockSize, outBlockCount, hInputStride, wInputStride, hOutputStride, wOutputStride; - unsigned long inputChannelsPadding, outputChannelsPadding; + uint64_t inputChannelsPadding, outputChannelsPadding; unpackParams(srcDesc, dstDesc, hInputStride, @@ -350,7 +362,7 @@ void PSROIPooling::executeBilinear(const inputType* srcData, const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc) { int inBlockSize, outBlockSize, outBlockCount, hInputStride, wInputStride, hOutputStride, wOutputStride; - unsigned long inputChannelsPadding, outputChannelsPadding; + uint64_t inputChannelsPadding, outputChannelsPadding; unpackParams(srcDesc, dstDesc, hInputStride, @@ -411,10 +423,12 @@ void PSROIPooling::executeBilinear(const inputType* srcData, const int leftXIndex = static_cast(floorf(inX)); int rightXIndex = static_cast(ceilf(inX)); - if (rightXIndex > width - 1) + if (rightXIndex > width - 1) { rightXIndex = width - 1; - if (bottomYIndex > height - 1) + } + if (bottomYIndex > height - 1) { bottomYIndex = height - 1; + } auto topLeftIndex = topYIndex * hInputStride + leftXIndex * wInputStride + inBlkRes; auto topRightIndex = topYIndex * hInputStride + rightXIndex * wInputStride + inBlkRes; @@ -520,8 +534,9 @@ void PSROIPooling::executeBilinearDeformable(const inputType* srcData, float w1 = wStart + iw * subBinSizeW; float h1 = hStart + ih * subBinSizeH; // bilinear interpolation - if (w1 < -0.5 || w1 > width - 0.5 || h1 < -0.5 || h1 > height - 0.5) + if (w1 < -0.5 || w1 > width - 0.5 || h1 < -0.5 || h1 > height - 0.5) { continue; + } w1 = static_cast((std::min)((std::max)(static_cast(w1), 0.0), width - 1.0)); h1 = static_cast((std::min)((std::max)(static_cast(h1), 0.0), height - 1.0)); int c1 = static_cast((c * groupSize + gh) * groupSize + gw); diff --git a/src/plugins/intel_cpu/src/nodes/psroi_pooling.h b/src/plugins/intel_cpu/src/nodes/psroi_pooling.h index 08912df92087a0..1b9d0752468960 100644 --- a/src/plugins/intel_cpu/src/nodes/psroi_pooling.h +++ b/src/plugins/intel_cpu/src/nodes/psroi_pooling.h @@ -55,8 +55,8 @@ class PSROIPooling : public Node { int& inBlockSize, int& outBlockSize, int& outBlockCount, - unsigned long& inputChannelsPadding, - unsigned long& outputChannelsPadding); + uint64_t& inputChannelsPadding, + uint64_t& outputChannelsPadding); template void executeAverage(const inputType* srcData, diff --git a/src/plugins/intel_cpu/src/nodes/qkv_proj.cpp b/src/plugins/intel_cpu/src/nodes/qkv_proj.cpp index d6f50e2df78244..f08f1a77d304d6 100644 --- a/src/plugins/intel_cpu/src/nodes/qkv_proj.cpp +++ b/src/plugins/intel_cpu/src/nodes/qkv_proj.cpp @@ -152,23 +152,25 @@ struct QKVProjection::Executor : public QKVProjection::ExecutorBase { ov::parallel_nt_static(m_threads_num, [&](const size_t ithr, const size_t nthr) { auto& work = works[ithr]; if (work) { - if (quantized_int8) + if (quantized_int8) { work.setup(wbuffer.get(ithr), reinterpret_cast(work.p_raw_weights), stride_in_bytes, true); - else + } else { work.setup(wbuffer.get(ithr), reinterpret_cast(work.p_raw_weights), stride_in_bytes); + } } }); } void setM(int M) { uint8_t* cur_scratch_base = nullptr; - if (m_scratchMem) + if (m_scratchMem) { cur_scratch_base = m_scratchMem->getDataAs(); + } // new M larger than previous or the scratch pointer is changed after the following allocation if (m_M < M || cur_scratch_base != m_scratch_base) { ScratchBuffAllocator allocator; @@ -341,8 +343,9 @@ QKVProjection::QKVProjection(const std::shared_ptr& op, const GraphCon const auto& config = context->getConfig(); size_t concurrency = config.streamExecutorConfig.get_threads_per_stream(); - if (concurrency == 0) + if (concurrency == 0) { concurrency = parallel_get_max_threads(); + } if (!isSupportedOperation(op, errorMessage, concurrency, config.fcDynamicQuantizationGroupSize)) { OPENVINO_THROW("CPU: " + errorMessage); @@ -352,8 +355,9 @@ QKVProjection::QKVProjection(const std::shared_ptr& op, const GraphCon } void QKVProjection::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } std::vector inPortConfigs; std::vector outPortConfigs; diff --git a/src/plugins/intel_cpu/src/nodes/random_uniform.cpp b/src/plugins/intel_cpu/src/nodes/random_uniform.cpp index eeb36442a71bc7..882feb378cc01c 100644 --- a/src/plugins/intel_cpu/src/nodes/random_uniform.cpp +++ b/src/plugins/intel_cpu/src/nodes/random_uniform.cpp @@ -192,8 +192,9 @@ std::string RandomUniform::getPrimitiveDescriptorType() const { std::string str_type; auto add_type = [&](const std::string& t) { - if (!str_type.empty() && t.c_str()[0] != '_') + if (!str_type.empty() && t.c_str()[0] != '_') { str_type += "_"; + } str_type += t; }; @@ -212,15 +213,16 @@ std::string RandomUniform::getPrimitiveDescriptorType() const { #undef SEARCH_TYPE - if (type == impl_desc_type::unknown) + if (type == impl_desc_type::unknown) { str_type = "unknown"; - else if (str_type.empty()) + } else if (str_type.empty()) { str_type = "undef"; + } if (selectedPrimitiveDesc) { if (selectedPrimitiveDesc->getConfig().outConfs[0].getMemDesc()->getPrecision() != ov::element::u8) { str_type += - "_" + std::string( + "_" + static_cast( selectedPrimitiveDesc->getConfig().outConfs[0].getMemDesc()->getPrecision().get_type_name()); } else { str_type += "_I8"; @@ -651,8 +653,8 @@ inline void convertToOutputTypeMersenne(const uint32_t in1, float* out, int64_t elements_remaining, bool optimization_enabled) { - const auto mask = static_cast((uint64_t(1) << std::numeric_limits::digits) - 1); - const auto divisor = static_cast(1) / (uint64_t(1) << std::numeric_limits::digits); + const auto mask = static_cast((static_cast(1) << std::numeric_limits::digits) - 1); + const auto divisor = static_cast(1) / (static_cast(1) << std::numeric_limits::digits); out[0] = static_cast((in1 & mask) * divisor) * range + min; if (elements_remaining >= 2l) { @@ -667,8 +669,8 @@ inline void convertToOutputTypeMersenne(const uint32_t in1, float16* out, int64_t elements_remaining, bool optimization_enabled) { - const auto mask = static_cast((uint64_t(1) << std::numeric_limits::digits) - 1); - const auto divisor = static_cast(1) / (uint64_t(1) << std::numeric_limits::digits); + const auto mask = static_cast((static_cast(1) << std::numeric_limits::digits) - 1); + const auto divisor = static_cast(1) / (static_cast(1) << std::numeric_limits::digits); out[0] = static_cast((in1 & mask) * divisor) * range + min; if (elements_remaining >= 2l) { diff --git a/src/plugins/intel_cpu/src/nodes/range.cpp b/src/plugins/intel_cpu/src/nodes/range.cpp index 1f0a02e5594d55..d09a9d810d77ca 100644 --- a/src/plugins/intel_cpu/src/nodes/range.cpp +++ b/src/plugins/intel_cpu/src/nodes/range.cpp @@ -36,29 +36,35 @@ Range::Range(const std::shared_ptr& op, const GraphContext::CPtr& cont OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } - if (getOriginalInputsNumber() != 3 || getOriginalOutputsNumber() != 1) + if (getOriginalInputsNumber() != 3 || getOriginalOutputsNumber() != 1) { THROW_CPU_NODE_ERR("has incorrect number of input/output edges!"); + } auto start_dims = op->get_input_shape(RANGE_START); - if (ov::shape_size(start_dims) != 1) + if (ov::shape_size(start_dims) != 1) { THROW_CPU_NODE_ERR("has start scalar with more than 1 value"); + } auto limit_dims = op->get_input_shape(RANGE_LIMIT); - if (ov::shape_size(limit_dims) != 1) + if (ov::shape_size(limit_dims) != 1) { THROW_CPU_NODE_ERR("has limit scalar with more than 1 value"); + } auto delta_dims = op->get_input_shape(RANGE_DELTA); - if (ov::shape_size(delta_dims) != 1) + if (ov::shape_size(delta_dims) != 1) { THROW_CPU_NODE_ERR("has delta scalar with more than 1 value"); + } size_t dstRank = op->get_output_partial_shape(0).size(); - if (dstRank > 1) + if (dstRank > 1) { THROW_CPU_NODE_ERR("has unsupported rank for output: ", dstRank); + } } void Range::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } std::vector inDataConf; std::vector outDataConf; @@ -72,15 +78,17 @@ void Range::initSupportedPrimitiveDescriptors() { getOriginalInputPrecisionAtPort(RANGE_DELTA) == ov::element::f32 && getOriginalOutputPrecisionAtPort(0) == ov::element::f32)) { inDataConf.reserve(inputShapes.size()); - for (size_t i = 0; i < inputShapes.size(); ++i) + for (size_t i = 0; i < inputShapes.size(); ++i) { inDataConf.emplace_back(LayoutType::ncsp, ov::element::f32); + } outDataConf.reserve(1); outDataConf.emplace_back(LayoutType::ncsp, ov::element::f32); addSupportedPrimDesc(inDataConf, outDataConf, impl_desc_type::ref_any); } else { inDataConf.reserve(inputShapes.size()); - for (size_t i = 0; i < inputShapes.size(); ++i) + for (size_t i = 0; i < inputShapes.size(); ++i) { inDataConf.emplace_back(LayoutType::ncsp); + } outDataConf.reserve(1); outDataConf.emplace_back(LayoutType::ncsp); addSupportedPrimDesc(inDataConf, outDataConf, impl_desc_type::ref_any); @@ -112,12 +120,15 @@ void Range::execute(const dnnl::stream& strm) { template size_t Range::getWorkAmount(data_t* startPtr, data_t* stopPtr, data_t* stepPtr) const { data_t start = 0, limit = 0, delta = 0; - if (startPtr == nullptr) + if (startPtr == nullptr) { startPtr = &start; - if (stopPtr == nullptr) + } + if (stopPtr == nullptr) { stopPtr = &limit; - if (stepPtr == nullptr) + } + if (stepPtr == nullptr) { stepPtr = δ + } *startPtr = getSrcDataAtPortAs(RANGE_START)[0]; *stopPtr = getSrcDataAtPortAs(RANGE_LIMIT)[0]; *stepPtr = getSrcDataAtPortAs(RANGE_DELTA)[0]; diff --git a/src/plugins/intel_cpu/src/nodes/rdft.cpp b/src/plugins/intel_cpu/src/nodes/rdft.cpp index 4639bbd8a8c814..c67648d47a7747 100644 --- a/src/plugins/intel_cpu/src/nodes/rdft.cpp +++ b/src/plugins/intel_cpu/src/nodes/rdft.cpp @@ -108,8 +108,9 @@ RDFT::RDFT(const std::shared_ptr& op, const GraphContext::CPtr& contex OPENVINO_THROW(errorMsgPrefix, " has invalid 'signalSize' input tensor with rank: ", signalSizeRank); } auto signalSizesNode = ov::as_type(op->get_input_node_ptr(2)); - if (!signalSizesNode) + if (!signalSizesNode) { return; + } isSignalSizesConstant = true; signalSizes = signalSizesNode->cast_vector(); } else if (isAxesConstant) { @@ -121,8 +122,9 @@ RDFT::RDFT(const std::shared_ptr& op, const GraphContext::CPtr& contex void RDFT::getSupportedDescriptors() {} void RDFT::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } const auto& dataPrecision = getOriginalInputPrecisionAtPort(DATA_INDEX); if (!dataPrecision.is_real()) { @@ -145,8 +147,9 @@ void RDFT::initSupportedPrimitiveDescriptors() { std::vector configurators( {{LayoutType::ncsp, ov::element::f32}, {LayoutType::ncsp, ov::element::i32}}); - if (inputShapes.size() > SIGNAL_SIZE_INDEX) + if (inputShapes.size() > SIGNAL_SIZE_INDEX) { configurators.push_back({LayoutType::ncsp, ov::element::i32}); + } addSupportedPrimDesc(configurators, {{LayoutType::ncsp, ov::element::f32}}, impl_desc_type::ref_any); } @@ -334,7 +337,7 @@ void RDFTExecutor::execute(float* inputPtr, canUseFFT(signalSizes[0]), false); } else { - if (!isInverse) + if (!isInverse) { rdftNd(inputPtr, outputPtr, twiddles, @@ -344,7 +347,7 @@ void RDFTExecutor::execute(float* inputPtr, inputStrides, outputShape, outputStrides); - else + } else { irdftNd(inputPtr, outputPtr, twiddles, @@ -354,6 +357,7 @@ void RDFTExecutor::execute(float* inputPtr, inputStrides, outputShape, outputStrides); + } } } @@ -507,10 +511,11 @@ void RDFTExecutor::fft(float* input, float* outputPtr = &scratchSpace[2 * signalSize]; if (inputSize < signalSize || type == real_to_complex) { - if (isInverse) + if (isInverse) { fftCopyInverseInputData(&scratchSpace[0], input, inputSize, signalSize, parallelize); - else if (type == real_to_complex) + } else if (type == real_to_complex) { fftCopyRealInputData(&scratchSpace[0], input, inputSize, parallelize); + } inputPtr = &scratchSpace[0]; } @@ -522,8 +527,9 @@ void RDFTExecutor::fft(float* input, size_t outputOffset = block * blockSize / 2; float cos = twiddlesPtr[2 * block]; float sin = twiddlesPtr[2 * block + 1]; - if (isInverse) + if (isInverse) { sin = -sin; + } for (size_t pair = 0; pair < blockSize / 2; pair++) { float evenReal = inputPtr[2 * (inputOffset + pair)]; float evenImag = inputPtr[2 * (inputOffset + pair) + 1]; @@ -555,8 +561,9 @@ void RDFTExecutor::fft(float* input, } } twiddlesPtr += numBlocks * 2; - if (numBlocks == 1 && inputPtr == input) + if (numBlocks == 1 && inputPtr == input) { inputPtr = &scratchSpace[0]; + } std::swap(inputPtr, outputPtr); } @@ -824,8 +831,9 @@ std::vector> RDFTExecutor::generateTwiddles(const std::vector size_t N = signalSizes[i]; size_t K = outputShape[axis]; auto type = complex_to_complex; - if (i == axes.size() - 1) + if (i == axes.size() - 1) { type = isInverse ? complex_to_real : real_to_complex; + } twiddles.push_back(generateTwiddlesCommon(N, K, type, canUseFFT(N))); } return twiddles; @@ -838,28 +846,33 @@ struct RDFTJitExecutor : public RDFTExecutor { rdftKernel.reset(new jit_dft_kernel_f32(isInverse, rdftType)); dftKernel.reset(new jit_dft_kernel_f32(isInverse, complex_to_complex)); vlen = cpu_isa_traits::vlen; - if (primDesc) + if (primDesc) { primDesc->setImplementationType(jit_avx512); + } } else if (mayiuse(cpu::x64::avx2)) { rdftKernel.reset(new jit_dft_kernel_f32(isInverse, rdftType)); dftKernel.reset(new jit_dft_kernel_f32(isInverse, complex_to_complex)); vlen = cpu_isa_traits::vlen; - if (primDesc) + if (primDesc) { primDesc->setImplementationType(jit_avx2); + } } else if (mayiuse(cpu::x64::sse41)) { rdftKernel.reset(new jit_dft_kernel_f32(isInverse, rdftType)); dftKernel.reset(new jit_dft_kernel_f32(isInverse, complex_to_complex)); vlen = cpu_isa_traits::vlen; - if (primDesc) + if (primDesc) { primDesc->setImplementationType(jit_sse42); + } } else { OPENVINO_THROW("Can't create RDFT kernel"); } - if (rdftKernel) + if (rdftKernel) { rdftKernel->create_ker(); - if (dftKernel) + } + if (dftKernel) { dftKernel->create_ker(); + } } std::vector generateTwiddlesDFT(size_t inputSize, size_t outputSize, enum dft_type type) override { @@ -946,8 +959,9 @@ struct RDFTRefExecutor : public RDFTExecutor { std::vector twiddles(inputSize * outputSize * 2); parallel_for2d(outputSize, inputSize, [&](size_t k, size_t n) { double angle = 2 * PI * k * n / inputSize; - if (!isInverse) + if (!isInverse) { angle = -angle; + } twiddles[(k * inputSize + n) * 2] = std::cos(angle); twiddles[(k * inputSize + n) * 2 + 1] = std::sin(angle); }); diff --git a/src/plugins/intel_cpu/src/nodes/reduce.cpp b/src/plugins/intel_cpu/src/nodes/reduce.cpp index 04dfbc2c35e30a..e50f8f413915a5 100644 --- a/src/plugins/intel_cpu/src/nodes/reduce.cpp +++ b/src/plugins/intel_cpu/src/nodes/reduce.cpp @@ -150,8 +150,9 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene data_type::f32); } - if (mayiuse(avx512_core)) + if (mayiuse(avx512_core)) { uni_vcvtneps2bf16 = std::make_shared(this, isa); + } this->preamble(); @@ -162,8 +163,9 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene mov(reg_dst, ptr[reg_params + GET_OFF(dst)]); mov(reg_work_amount, ptr[reg_params + GET_OFF(work_amount)]); mov(reg_work_batch, ptr[reg_params + GET_OFF(work_batch)]); - if (planar_layout) + if (planar_layout) { mov(reg_reduce_w, ptr[reg_params + GET_OFF(reduce_w)]); + } if (jcp_.reduce_mode == Algorithm::ReduceAnd || jcp_.reduce_mode == Algorithm::ReduceL1 || jcp_.reduce_mode == Algorithm::ReduceMax || jcp_.reduce_mode == Algorithm::ReduceMin || @@ -172,8 +174,9 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene } if (isa == cpu::x64::avx512_core || jcp_.reduce_mode == Algorithm::ReduceAnd || - jcp_.reduce_mode == Algorithm::ReduceOr) + jcp_.reduce_mode == Algorithm::ReduceOr) { uni_vpxor(vmm_zero, vmm_zero, vmm_zero); + } if ((isa == cpu::x64::avx512_core && jcp_.reduce_mode == Algorithm::ReduceAnd) || jcp_.reduce_mode == Algorithm::ReduceOr) { @@ -185,8 +188,9 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene this->postamble(); - if (mayiuse(avx512_core)) + if (mayiuse(avx512_core)) { uni_vcvtneps2bf16->emit_data(); + } if (jcp_.reduce_mode == Algorithm::ReduceAnd || jcp_.reduce_mode == Algorithm::ReduceL1 || jcp_.reduce_mode == Algorithm::ReduceMax || jcp_.reduce_mode == Algorithm::ReduceMin || @@ -369,10 +373,11 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene uni_vmovups(vmm_dst, table_val(0)); break; case Algorithm::ReduceProd: - if (isFloatCompatible(jcp_.src_dt)) + if (isFloatCompatible(jcp_.src_dt)) { uni_vmovups(vmm_dst, table_val(0)); - else + } else { uni_vmovups(vmm_dst, table_val(6)); + } break; case Algorithm::ReduceL1: uni_vmovups(vmm_aux, table_val(1)); @@ -388,16 +393,18 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene uni_vpxor(vmm_dst, vmm_dst, vmm_dst); break; case Algorithm::ReduceMax: - if (isFloatCompatible(jcp_.dst_dt)) + if (isFloatCompatible(jcp_.dst_dt)) { uni_vmovups(vmm_dst, table_val(2)); - else + } else { uni_vmovups(vmm_dst, table_val(4)); + } break; case Algorithm::ReduceMin: - if (isFloatCompatible(jcp_.dst_dt)) + if (isFloatCompatible(jcp_.dst_dt)) { uni_vmovups(vmm_dst, table_val(3)); - else + } else { uni_vmovups(vmm_dst, table_val(5)); + } break; default: assert(!"unsupported reduce mode"); @@ -691,8 +698,9 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene assert(!"unknown src_dt"); } - if (convert_i32_to_f32(src_dt)) + if (convert_i32_to_f32(src_dt)) { uni_vcvtdq2ps(vmm_val, vmm_val); + } add(rsp, vlen); } @@ -914,8 +922,9 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene inline void load_dst_vector() { load_vector(vmm_dst, ptr[reg_dst], jcp_.dst_dt); - if (isa == cpu::x64::sse41) + if (isa == cpu::x64::sse41) { load_vector(vmm_dst_aux, ptr[reg_dst + 4 * jcp_.dst_data_size], jcp_.dst_dt); + } } inline void store_dst_vector() { @@ -929,8 +938,9 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene } } store_vector(ptr[reg_dst], vmm_dst, jcp_.dst_dt); - if (isa == cpu::x64::sse41) + if (isa == cpu::x64::sse41) { store_vector(ptr[reg_dst + 4 * jcp_.dst_data_size], vmm_dst_aux, jcp_.dst_dt); + } } inline void load_vector(Vmm vmm_src, const Xbyak::Address& op, memory::data_type src_dt) { @@ -956,8 +966,9 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene assert(!"unknown src_dt"); } - if (convert_i32_to_f32(src_dt)) + if (convert_i32_to_f32(src_dt)) { uni_vcvtdq2ps(vmm_src, vmm_src); + } } inline void load_scalar(Xmm xmm_src, const Xbyak::Address& op, memory::data_type src_dt) { @@ -1018,13 +1029,15 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene vpmovsdb(op, vmm_dst); } else { uni_vpackssdw(vmm_dst, vmm_dst, vmm_dst); - if (isa != cpu::x64::sse41) + if (isa != cpu::x64::sse41) { vpermq(ymm_dst, ymm_dst, 0x08); + } uni_vpacksswb(vmm_dst, vmm_dst, vmm_dst); - if (isa != cpu::x64::sse41) + if (isa != cpu::x64::sse41) { vmovq(op, xmm_dst); - else + } else { uni_vmovd(op, xmm_dst); + } } break; case memory::data_type::u8: @@ -1033,13 +1046,15 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene vpmovusdb(op, vmm_dst); } else { uni_vpackusdw(vmm_dst, vmm_dst, vmm_dst); - if (isa != cpu::x64::sse41) + if (isa != cpu::x64::sse41) { vpermq(ymm_dst, ymm_dst, 0x08); + } uni_vpackuswb(vmm_dst, vmm_dst, vmm_dst); - if (isa != cpu::x64::sse41) + if (isa != cpu::x64::sse41) { vmovq(op, xmm_dst); - else + } else { uni_vmovd(op, xmm_dst); + } } break; default: @@ -1144,10 +1159,11 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene uni_vorps(xmm, xmm, op); break; case Algorithm::ReduceProd: - if (isFloatCompatible(jcp_.src_dt)) + if (isFloatCompatible(jcp_.src_dt)) { uni_vmulps(xmm, xmm, op); - else + } else { uni_vpmulld(xmm, xmm, op); + } break; default: assert(!"unsupported reduce mode"); @@ -1237,8 +1253,9 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi data_type::f32); } - if (mayiuse(avx512_core)) + if (mayiuse(avx512_core)) { uni_vcvtneps2bf16 = std::make_shared(this, isa); + } this->preamble(); @@ -1252,17 +1269,20 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi mov(reg_work_amount, ptr[reg_params + GET_OFF_POST(work_amount)]); mov(reg_channel_size, ptr[reg_params + GET_OFF_POST(channel_size)]); mov(reg_divisor, ptr[reg_params + GET_OFF_POST(divisor)]); - if (jcp_.fuse_low_precision) + if (jcp_.fuse_low_precision) { mov(reg_src, ptr[reg_params + GET_OFF_POST(src)]); - if (!planar_layout) + } + if (!planar_layout) { mov(reg_reduce_c, ptr[reg_params + GET_OFF_POST(reduce_c)]); + } if (post_ops_fusing) { mov(reg_post_ops_data, ptr[reg_params + GET_OFF_POST(post_op_data)]); mov(reg_oc_off, ptr[reg_params + GET_OFF_POST(oc_off)]); } - if (isa == cpu::x64::avx512_core) + if (isa == cpu::x64::avx512_core) { uni_vpxor(vmm_zero, vmm_zero, vmm_zero); + } if (jcp_.layout == ReduceLayoutType::reduce_blocked) { reduce_post_main(); @@ -1292,15 +1312,17 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi this->postamble(); - if (mayiuse(avx512_core)) + if (mayiuse(avx512_core)) { uni_vcvtneps2bf16->emit_data(); + } if (jcp_.reduce_mode == Algorithm::ReduceLogSum || jcp_.reduce_mode == Algorithm::ReduceLogSumExp) { log_injector->prepare_table(); } - for (auto& inj : eltwise_injectors) + for (auto& inj : eltwise_injectors) { inj->prepare_table(); + } } private: @@ -1377,17 +1399,20 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi // load wrap_load_vector(vmm_dst, 0); - if (isa == cpu::x64::sse41) + if (isa == cpu::x64::sse41) { wrap_load_vector(vmm_dst_aux, 4); + } // reduce and store horiz_reduce_store(vmm_dst, jcp_.dst_dt); - if (isa == cpu::x64::sse41) + if (isa == cpu::x64::sse41) { horiz_reduce_store(vmm_dst_aux, jcp_.dst_dt, true); + } add(reg_dst, step * jcp_.dst_data_size); - if (jcp_.fuse_low_precision) + if (jcp_.fuse_low_precision) { add(reg_src, step * sizeof(float)); + } sub(reg_work_amount, step); jmp(reduce_loop_label, T_NEAR); @@ -1396,8 +1421,9 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi if (post_reduce || post_ops_fusing) { mov(reg_dst, ptr[reg_params + GET_OFF_POST(dst)]); - if (jcp_.fuse_low_precision) + if (jcp_.fuse_low_precision) { mov(reg_src, ptr[reg_params + GET_OFF_POST(src)]); + } mov(reg_work_amount, ptr[reg_params + GET_OFF_POST(work_amount)]); } } @@ -1407,8 +1433,9 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi L(reduce_map_label); { if (post_reduce) { - if (jcp_.reduce_mode == Algorithm::ReduceMean) + if (jcp_.reduce_mode == Algorithm::ReduceMean) { uni_vbroadcastss(vmm_aux, ptr[reg_divisor]); + } Xbyak::Label reduce_loop_label; Xbyak::Label reduce_loop_end_label; @@ -1421,28 +1448,33 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi wrap_load_vector(vmm_dst, 0); reduce_map_kernel(vmm_dst); - if (post_ops_fusing) + if (post_ops_fusing) { apply_post_ops(jcp_.dst_dt, jcp_.fuse_broadcast); + } store_vector(ptr[reg_dst], vmm_dst, jcp_.dst_dt); if (isa == cpu::x64::sse41) { wrap_load_vector(vmm_dst, 4); reduce_map_kernel(vmm_dst); if (post_ops_fusing) { - if (jcp_.layout != ReduceLayoutType::reduce_ncsp) + if (jcp_.layout != ReduceLayoutType::reduce_ncsp) { add(reg_oc_off, 4 * sizeof(float)); + } apply_post_ops(jcp_.dst_dt, jcp_.fuse_broadcast); - if (jcp_.layout != ReduceLayoutType::reduce_ncsp) + if (jcp_.layout != ReduceLayoutType::reduce_ncsp) { sub(reg_oc_off, 4 * sizeof(float)); + } } store_vector(ptr[reg_dst + 4 * jcp_.dst_data_size], vmm_dst, jcp_.dst_dt); } add(reg_dst, step * jcp_.dst_data_size); - if (jcp_.fuse_low_precision) + if (jcp_.fuse_low_precision) { add(reg_src, step * sizeof(float)); - if (post_ops_fusing && increase_oc_off) + } + if (post_ops_fusing && increase_oc_off) { add(reg_oc_off, step * sizeof(float)); + } sub(reg_work_amount, step); jmp(reduce_loop_label, T_NEAR); @@ -1465,19 +1497,23 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi if (isa == cpu::x64::sse41) { wrap_load_vector(vmm_dst, 4); - if (jcp_.layout != ReduceLayoutType::reduce_ncsp) + if (jcp_.layout != ReduceLayoutType::reduce_ncsp) { add(reg_oc_off, 4 * sizeof(float)); + } apply_post_ops(jcp_.dst_dt, jcp_.fuse_broadcast); - if (jcp_.layout != ReduceLayoutType::reduce_ncsp) + if (jcp_.layout != ReduceLayoutType::reduce_ncsp) { sub(reg_oc_off, 4 * sizeof(float)); + } store_vector(ptr[reg_dst + 4 * jcp_.dst_data_size], vmm_dst, jcp_.dst_dt); } add(reg_dst, step * jcp_.dst_data_size); - if (jcp_.fuse_low_precision) + if (jcp_.fuse_low_precision) { add(reg_src, step * sizeof(float)); - if (post_ops_fusing && increase_oc_off) + } + if (post_ops_fusing && increase_oc_off) { add(reg_oc_off, step * sizeof(float)); + } sub(reg_work_amount, step); jmp(reduce_loop_label, T_NEAR); @@ -1492,8 +1528,9 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi // reduce map for tail in dst memory // cases: [ReduceL2] [ReduceLogSum] [ReduceLogSumExp] [ReduceMean] in planar layout if (post_reduce) { - if (jcp_.reduce_mode == Algorithm::ReduceMean) + if (jcp_.reduce_mode == Algorithm::ReduceMean) { uni_vbroadcastss(xmm_aux, ptr[reg_divisor]); + } Xbyak::Label reduce_loop_label; Xbyak::Label reduce_loop_end_label; @@ -1511,15 +1548,18 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi reduce_map_kernel_scalar(xmm_dst); // store - if (post_ops_fusing) + if (post_ops_fusing) { apply_post_ops(jcp_.dst_dt, jcp_.fuse_broadcast); + } store_scalar(ptr[reg_dst], xmm_dst, jcp_.dst_dt); add(reg_dst, step * jcp_.dst_data_size); - if (jcp_.fuse_low_precision) + if (jcp_.fuse_low_precision) { add(reg_src, step * sizeof(float)); - if (post_ops_fusing && increase_oc_off) + } + if (post_ops_fusing && increase_oc_off) { add(reg_oc_off, step * sizeof(float)); + } sub(reg_work_amount, step); jmp(reduce_loop_label, T_NEAR); @@ -1544,10 +1584,12 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi store_scalar(ptr[reg_dst], xmm_dst, jcp_.dst_dt); add(reg_dst, step * jcp_.dst_data_size); - if (jcp_.fuse_low_precision) + if (jcp_.fuse_low_precision) { add(reg_src, step * sizeof(float)); - if (post_ops_fusing && increase_oc_off) + } + if (post_ops_fusing && increase_oc_off) { add(reg_oc_off, step * sizeof(float)); + } sub(reg_work_amount, step); jmp(reduce_loop_label, T_NEAR); @@ -1618,35 +1660,39 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi } inline void reduce_map_kernel(Vmm vmm_dst) { - if (jcp_.reduce_mode == Algorithm::ReduceMean) + if (jcp_.reduce_mode == Algorithm::ReduceMean) { uni_vdivps(vmm_dst, vmm_dst, vmm_aux); - else if (jcp_.reduce_mode == Algorithm::ReduceL2) + } else if (jcp_.reduce_mode == Algorithm::ReduceL2) { uni_vsqrtps(vmm_dst, vmm_dst); - else if (jcp_.reduce_mode == Algorithm::ReduceLogSum || jcp_.reduce_mode == Algorithm::ReduceLogSumExp) + } else if (jcp_.reduce_mode == Algorithm::ReduceLogSum || jcp_.reduce_mode == Algorithm::ReduceLogSumExp) { log_injector->compute_vector_range(vmm_dst.getIdx(), vmm_dst.getIdx() + 1); + } } inline void reduce_map_kernel_scalar(Xmm xmm_dst) { - if (jcp_.reduce_mode == Algorithm::ReduceMean) + if (jcp_.reduce_mode == Algorithm::ReduceMean) { uni_vdivps(xmm_dst, xmm_dst, xmm_aux); - else if (jcp_.reduce_mode == Algorithm::ReduceL2) + } else if (jcp_.reduce_mode == Algorithm::ReduceL2) { uni_vsqrtps(xmm_dst, xmm_dst); - else if (jcp_.reduce_mode == Algorithm::ReduceLogSum || jcp_.reduce_mode == Algorithm::ReduceLogSumExp) + } else if (jcp_.reduce_mode == Algorithm::ReduceLogSum || jcp_.reduce_mode == Algorithm::ReduceLogSumExp) { log_injector->compute_vector_range(xmm_dst.getIdx(), xmm_dst.getIdx() + 1); + } } inline void wrap_load_vector(Vmm vmm_val, size_t offset) { - if (jcp_.fuse_low_precision) + if (jcp_.fuse_low_precision) { load_vector(vmm_val, ptr[reg_src + offset * sizeof(float)], memory::data_type::f32); - else + } else { load_vector(vmm_val, ptr[reg_dst + offset * jcp_.dst_data_size], jcp_.dst_dt); + } } inline void wrap_load_scalar(Xmm xmm_val, size_t offset) { - if (jcp_.fuse_low_precision) + if (jcp_.fuse_low_precision) { load_scalar(xmm_val, ptr[reg_src + offset * sizeof(float)], memory::data_type::f32); - else + } else { load_scalar(xmm_val, ptr[reg_dst + offset * jcp_.dst_data_size], jcp_.dst_dt); + } } inline void load_vector(Vmm vmm_src, const Xbyak::Address& op, memory::data_type src_dt) { @@ -1672,8 +1718,9 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi assert(!"unknown src_dt"); } - if (!isFloatCompatible(src_dt)) + if (!isFloatCompatible(src_dt)) { uni_vcvtdq2ps(vmm_src, vmm_src); + } } inline void load_scalar(Xmm xmm_src, const Xbyak::Address& op, memory::data_type src_dt) { @@ -1735,13 +1782,15 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi vpmovsdb(op, vmm_dst); } else { uni_vpackssdw(vmm_dst, vmm_dst, vmm_dst); - if (isa != cpu::x64::sse41) + if (isa != cpu::x64::sse41) { vpermq(ymm_dst, ymm_dst, 0x08); + } uni_vpacksswb(vmm_dst, vmm_dst, vmm_dst); - if (isa != cpu::x64::sse41) + if (isa != cpu::x64::sse41) { vmovq(op, xmm_dst); - else + } else { uni_vmovd(op, xmm_dst); + } } break; case memory::data_type::u8: @@ -1750,13 +1799,15 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi vpmovusdb(op, vmm_dst); } else { uni_vpackusdw(vmm_dst, vmm_dst, vmm_dst); - if (isa != cpu::x64::sse41) + if (isa != cpu::x64::sse41) { vpermq(ymm_dst, ymm_dst, 0x08); + } uni_vpackuswb(vmm_dst, vmm_dst, vmm_dst); - if (isa != cpu::x64::sse41) + if (isa != cpu::x64::sse41) { vmovq(op, xmm_dst); - else + } else { uni_vmovd(op, xmm_dst); + } } break; default: @@ -1968,15 +2019,17 @@ Reduce::Reduce(const std::shared_ptr& op, const GraphContext::CPtr& co keep_dims = reduce->get_keep_dims(); auto reduceConst = ov::as_type_ptr(reduce->get_input_node_shared_ptr(REDUCE_INDEXES)); - if (!reduceConst) + if (!reduceConst) { THROW_CPU_NODE_ERR("second tensor is not constant!"); + } raw_axes = reduceConst->cast_vector(); } else if (const auto reduce = ov::as_type_ptr(op)) { keep_dims = reduce->get_keep_dims(); auto reduceConst = ov::as_type_ptr(reduce->get_input_node_shared_ptr(REDUCE_INDEXES)); - if (!reduceConst) + if (!reduceConst) { THROW_CPU_NODE_ERR("second tensor is not constant!"); + } raw_axes = reduceConst->cast_vector(); } set_use_aux_kernel = false; @@ -1991,31 +2044,36 @@ Reduce::Reduce(const std::shared_ptr& op, const GraphContext::CPtr& co } void Reduce::getSupportedDescriptors() { - if (getParentEdges().size() != 2) + if (getParentEdges().size() != 2) { THROW_CPU_NODE_ERR("gets incorrect number of input edges!"); - if (getChildEdges().empty()) + } + if (getChildEdges().empty()) { THROW_CPU_NODE_ERR("gets incorrect number of output edges!"); + } if (getInputShapeAtPort(REDUCE_INDEXES).getRank() != 1) { THROW_CPU_NODE_ERR("gets incorrect index vector dimension! Index vector should be 1 dimension."); } if (keep_dims) { - if (getInputShapeAtPort(REDUCE_DATA).getRank() != getOutputShapeAtPort(0).getRank()) + if (getInputShapeAtPort(REDUCE_DATA).getRank() != getOutputShapeAtPort(0).getRank()) { THROW_CPU_NODE_ERR("gets incorrect number of input/output dimensions!"); + } } else { // In fact, after the Reduce operation, the shape must be a scalar if the previous one was 1d. // But for now, 0d tensor (scalar) is emulated as 1d tensor. Skip checking in such cases. bool is_emulated_0d_as_1d = getInputShapeAtPort(REDUCE_DATA).getRank() == 1 && getOutputShapeAtPort(0).getRank() == 1; - if (getInputShapeAtPort(REDUCE_DATA).getRank() <= getOutputShapeAtPort(0).getRank() && !is_emulated_0d_as_1d) + if (getInputShapeAtPort(REDUCE_DATA).getRank() <= getOutputShapeAtPort(0).getRank() && !is_emulated_0d_as_1d) { THROW_CPU_NODE_ERR("gets incorrect number of input/output dimensions!"); + } } } void Reduce::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } input_prec = getOriginalInputPrecisionAtPort(REDUCE_DATA); output_prec = getOriginalOutputPrecisionAtPort(0); @@ -2037,11 +2095,13 @@ void Reduce::initSupportedPrimitiveDescriptors() { // use BF16/FP16 output precision due to the possible accuracy loss. Therefore, for such mods, we will change // the output precision to FP32. if (ov::element::bf16 == output_prec) { - if (!mayiuse(avx512_core) || is_precision_sensitive_reduce(algorithm)) + if (!mayiuse(avx512_core) || is_precision_sensitive_reduce(algorithm)) { output_prec = ov::element::f32; + } } else if (ov::element::f16 == output_prec) { - if (!mayiuse(cpu::x64::avx2) || is_precision_sensitive_reduce(algorithm)) + if (!mayiuse(cpu::x64::avx2) || is_precision_sensitive_reduce(algorithm)) { output_prec = ov::element::f32; + } } if (!fusedWith.empty()) { @@ -2234,8 +2294,9 @@ void Reduce::prepareParams() { post_kernel.reset(new jit_uni_reduce_post_kernel_f32(key.jcp, *attr.get())); } #endif // OPENVINO_ARCH_X86_64 - if (post_kernel) + if (post_kernel) { post_kernel->create_ker(); + } return post_kernel; }; @@ -2269,12 +2330,15 @@ void Reduce::createPrimitive() { } auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(REDUCE_DATA); - if (!dstMemPtr) + if (!dstMemPtr) { THROW_CPU_NODE_ERR("has null destination memory."); - if (!srcMemPtr) + } + if (!srcMemPtr) { THROW_CPU_NODE_ERR("has null input memory."); - if (getSelectedPrimitiveDescriptor() == nullptr) + } + if (getSelectedPrimitiveDescriptor() == nullptr) { THROW_CPU_NODE_ERR("has nullable preferable primitive descriptor"); + } if (srcMemPtr->getDesc().hasLayoutType(LayoutType::ncsp)) { layout = ReduceLayoutType::reduce_ncsp; @@ -2316,8 +2380,9 @@ void Reduce::createPrimitive() { } if (inputShapesDefined()) { - if (needPrepareParams()) + if (needPrepareParams()) { prepareParams(); + } updateLastInputDims(); } @@ -2357,8 +2422,9 @@ void Reduce::create_reduce_kernel(std::shared_ptr& kernel kernel.reset(new jit_uni_reduce_kernel_f32(jcp)); } #endif // OPENVINO_ARCH_X86_64 - if (kernel) + if (kernel) { kernel->create_ker(); + } jit_mode = jit_mode && kernel; } @@ -2966,8 +3032,9 @@ inline void Reduce::reduce_kernel_post_process(uint8_t* out_ptr) { } else if (layout == ReduceLayoutType::reduce_nspc) { const size_t num_threads = static_cast(parallel_get_max_threads()); size_t OP = OB * OC >= num_threads ? OB * OC : OB * OC * OD; - if (OP < num_threads && OW > blk_size) + if (OP < num_threads && OW > blk_size) { OP *= OH; + } size_t work_amount = OB * OC * OD * OH * OW / OP; auto op_loop = [&](size_t op) { const uint8_t* in_p = in_ptr + op * work_amount * intermediate_data_size; @@ -3341,10 +3408,12 @@ inline void Reduce::calc_process_dst_dims(std::vector& reduce_axes, const V process_dst_dims.clear(); axes_for_reduction.clear(); for (auto& axis : reduce_axes) { - if (axis < 0) + if (axis < 0) { axis += src_dims.size(); - if (static_cast(axis) > src_dims.size()) + } + if (static_cast(axis) > src_dims.size()) { THROW_CPU_NODE_ERR("exceeds data tensor dimension on index to reduce"); + } axes.insert(static_cast(axis)); } for (size_t i = 0; i < src_dims.size(); i++) { @@ -3356,8 +3425,9 @@ inline void Reduce::calc_process_dst_dims(std::vector& reduce_axes, const V } } if (found) { - if (keep_dims) + if (keep_dims) { out_dims.push_back(1); + } process_dst_dims.push_back(1); axes_for_reduction.push_back(i); } else { @@ -3366,13 +3436,15 @@ inline void Reduce::calc_process_dst_dims(std::vector& reduce_axes, const V } } if (jit_mode && jit_beyond_5D) { - if (std::accumulate(out_dims.begin(), out_dims.end(), size_t(1), std::multiplies()) != - std::accumulate(dst_dims.begin(), dst_dims.end(), size_t(1), std::multiplies())) + if (std::accumulate(out_dims.begin(), out_dims.end(), static_cast(1), std::multiplies()) != + std::accumulate(dst_dims.begin(), dst_dims.end(), static_cast(1), std::multiplies())) { THROW_CPU_NODE_ERR("gets incorrect number of output dimensions!"); + } } else { for (size_t i = 0; i < std::min(out_dims.size(), dst_dims.size()); i++) { - if (out_dims[i] != dst_dims[i]) + if (out_dims[i] != dst_dims[i]) { THROW_CPU_NODE_ERR("gets incorrect number of output dimensions!"); + } } } } @@ -3526,10 +3598,12 @@ void Reduce::reduce_ref_process(const float* in_ptr, float init_value, std::function func) { size_t work_amount_dst = 1, reduced_dims_work_amount = 1; - for (size_t i = 0; i < process_dst_dims.size(); i++) + for (size_t i = 0; i < process_dst_dims.size(); i++) { work_amount_dst *= process_dst_dims[i]; - for (size_t i = 0; i < src_dims.size(); i++) + } + for (size_t i = 0; i < src_dims.size(); i++) { reduced_dims_work_amount *= src_dims[i]; + } reduced_dims_work_amount /= work_amount_dst; VectorDims src_strides = @@ -3550,8 +3624,9 @@ void Reduce::reduce_ref_process(const float* in_ptr, for (i = 0; i < reduced_dims_work_amount; ++i) { if (update_idx) { src_idx = 0; - for (j = 0; j < static_cast(src_dims.size()); ++j) + for (j = 0; j < static_cast(src_dims.size()); ++j) { src_idx += (src_counters[j] % src_dims[j]) * src_strides[j]; + } update_idx = false; } reduce_prod = func(reduce_prod, in_ptr[src_idx]); @@ -3569,10 +3644,11 @@ void Reduce::reduce_ref_process(const float* in_ptr, out_ptr[dst_idx] = reduce_prod; for (j = process_dst_dims.size() - 1; j >= 0; j--) { dst_counters[j]++; - if (dst_counters[j] < process_dst_dims[j]) + if (dst_counters[j] < process_dst_dims[j]) { break; - else + } else { dst_counters[j] = 0; + } } } }); @@ -3641,8 +3717,9 @@ void Reduce::setJITBeyond5D() { jit_beyond_5D = false; if (getInputShapeAtPort(REDUCE_DATA).getRank() > 5) { for (auto& axis : raw_axes) { - if (axis < 0) + if (axis < 0) { axis += static_cast(getInputShapeAtPort(REDUCE_DATA).getRank()); + } } if (raw_axes.size() <= 1) { @@ -3662,8 +3739,9 @@ void Reduce::setJITBeyond5D() { std::vector Reduce::update_src_dims() { std::vector reduce_axes = raw_axes; - if (reduce_axes.size() < 1) + if (reduce_axes.size() < 1) { return reduce_axes; + } size_t axis_dim = 1; size_t outer_dim = 1; diff --git a/src/plugins/intel_cpu/src/nodes/reference.cpp b/src/plugins/intel_cpu/src/nodes/reference.cpp index 3283f7a43253ab..c4dd73a07290cc 100644 --- a/src/plugins/intel_cpu/src/nodes/reference.cpp +++ b/src/plugins/intel_cpu/src/nodes/reference.cpp @@ -31,8 +31,9 @@ Reference::Reference(const std::shared_ptr& op, const GraphContext::CP void Reference::getSupportedDescriptors() {} void Reference::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } std::vector inputConfigurators; inputConfigurators.reserve(inputShapes.size()); diff --git a/src/plugins/intel_cpu/src/nodes/region_yolo.cpp b/src/plugins/intel_cpu/src/nodes/region_yolo.cpp index 10fd3ef2bb77f5..0cda931c137ba3 100644 --- a/src/plugins/intel_cpu/src/nodes/region_yolo.cpp +++ b/src/plugins/intel_cpu/src/nodes/region_yolo.cpp @@ -49,8 +49,9 @@ struct jit_uni_logistic_kernel_f32 : public jit_uni_logistic_kernel, public jit_ exp_injector.reset( new jit_uni_eltwise_injector(this, dnnl::impl::alg_kind::eltwise_exp, 0.f, 0.f, 1.f, data_type::f32)); - if (mayiuse(avx512_core)) + if (mayiuse(avx512_core)) { uni_vcvtneps2bf16.reset(new jit_uni_vcvtneps2bf16(this, isa)); + } this->preamble(); @@ -101,8 +102,9 @@ struct jit_uni_logistic_kernel_f32 : public jit_uni_logistic_kernel, public jit_ this->postamble(); - if (uni_vcvtneps2bf16) + if (uni_vcvtneps2bf16) { uni_vcvtneps2bf16->emit_data(); + } exp_injector->prepare_table(); @@ -265,8 +267,9 @@ RegionYolo::RegionYolo(const std::shared_ptr& op, const GraphContext:: OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } - if (op->get_input_size() != 1 || op->get_output_size() != 1) + if (op->get_input_size() != 1 || op->get_output_size() != 1) { THROW_CPU_NODE_ERR("has incorrect number of input/output edges!"); + } const auto regionYolo = ov::as_type_ptr(op); classes = regionYolo->get_num_classes(); @@ -278,8 +281,9 @@ RegionYolo::RegionYolo(const std::shared_ptr& op, const GraphContext:: } void RegionYolo::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } input_prec = getOriginalInputPrecisionAtPort(0); output_prec = getOriginalOutputPrecisionAtPort(0); @@ -334,8 +338,9 @@ void RegionYolo::createPrimitive() { block_size = 4; } - if (logistic_kernel) + if (logistic_kernel) { logistic_kernel->create_ker(); + } #endif softmax_kernel = std::make_shared(input_prec, output_prec); } @@ -344,14 +349,16 @@ inline float RegionYolo::logistic_scalar(float src) { U aux2; aux2.as_float_value = src; int sign = aux2.as_int_value >> 31; - if (sign == 0) + if (sign == 0) { src *= -1; + } src = std::exp(src); src = src / (src + 1); - if (sign == 0) + if (sign == 0) { src = 1 - src; + } return src; } @@ -411,11 +418,12 @@ void RegionYolo::execute(const dnnl::stream& strm) { output_size = B * IH * IW * mask_size * (classes + coords + 1); } - if (output_size != getDstMemoryAtPort(0)->getShape().getElementsCount()) + if (output_size != getDstMemoryAtPort(0)->getShape().getElementsCount()) { OPENVINO_THROW("Incorrect layer configuration or output dimensions. ", output_size, " != ", getDstMemoryAtPort(0)->getShape().getElementsCount()); + } size_t inputs_size = IH * IW * num_ * (classes + coords + 1); size_t total_size = 2 * IH * IW; diff --git a/src/plugins/intel_cpu/src/nodes/reorder.cpp b/src/plugins/intel_cpu/src/nodes/reorder.cpp index bde3481da6fc47..73fb1b0509f01f 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.cpp +++ b/src/plugins/intel_cpu/src/nodes/reorder.cpp @@ -53,15 +53,18 @@ Reorder::Reorder(const MemoryDesc& input, } void Reorder::getSupportedDescriptors() { - if (getParentEdges().size() != 1) + if (getParentEdges().size() != 1) { THROW_CPU_NODE_ERR("has incorrect number of input edges."); - if (getChildEdges().empty()) + } + if (getChildEdges().empty()) { THROW_CPU_NODE_ERR("has incorrect number of output edges."); + } } void Reorder::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } auto parent = getParentEdgeAt(0)->getParent(); auto child = getChildEdgeAt(0)->getChild(); @@ -96,9 +99,10 @@ void Reorder::initSupportedPrimitiveDescriptors() { shapeInference = std::make_shared(); } - if (isDynamic && - (config.inConfs[0].getMemDesc()->getShape().getRank() != config.outConfs[0].getMemDesc()->getShape().getRank())) + if (isDynamic && (config.inConfs[0].getMemDesc()->getShape().getRank() != + config.outConfs[0].getMemDesc()->getShape().getRank())) { THROW_CPU_NODE_ERR("doesn't support case when input and output shapes have different rank and dynamic."); + } if (!isOptimized) { const auto& inShape = getInputShapeAtPort(0); if (one_of(inShape.getRank(), 4u, 5u) && config.inConfs[0].getMemDesc()->hasLayoutType(LayoutType::nspc) && @@ -122,8 +126,9 @@ void Reorder::initSupportedPrimitiveDescriptors() { void Reorder::createPrimitive() { if (shapesDefined()) { - if (needPrepareParams()) + if (needPrepareParams()) { prepareParams(); + } updateLastInputDims(); } } @@ -139,21 +144,24 @@ void Reorder::prepareReorderAsTranspose(const MemoryDescPtr& parentDesc, const M const auto rank = lhs.getShape().getRank(); if (lhs.hasLayoutType(LayoutType::ncsp) && rhs.hasLayoutType(LayoutType::nspc)) { - if (rank == 4) + if (rank == 4) { return {{0, 2, 3, 1}, {in[0], in[2], in[3], in[1]}}; - else + } else { return {{0, 2, 1}, {in[0], in[2], in[1]}}; + } } else if (lhs.hasLayoutType(LayoutType::nspc) && rhs.hasLayoutType(LayoutType::ncsp)) { - if (rank == 4) + if (rank == 4) { return {{0, 3, 1, 2}, {in[0], in[3], in[1], in[2]}}; - else + } else { return {{0, 2, 1}, {in[0], in[2], in[1]}}; + } } else { - if (rank == 4) + if (rank == 4) { return {{0, 1, 2, 3}, in}; - else + } else { return {{0, 1, 2}, in}; + } } }; @@ -184,17 +192,21 @@ void Reorder::prepareReorderAsTranspose(const MemoryDescPtr& parentDesc, const M } void Reorder::prepareParams() { - if (isOptimized) + if (isOptimized) { return; + } auto srcMemPtr = getSrcMemoryAtPort(0); auto dstMemPtr = getDstMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isDefined()) + if (!dstMemPtr || !dstMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined destination memory object."); - if (!srcMemPtr || !srcMemPtr->isDefined()) + } + if (!srcMemPtr || !srcMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined input memory object."); - if (getSelectedPrimitiveDescriptor() == nullptr) + } + if (getSelectedPrimitiveDescriptor() == nullptr) { THROW_CPU_NODE_ERR("does not have preferable primitive descriptor."); + } auto isSupportedDesc = [](const MemoryDesc& desc) { if (!desc.isDefined()) { @@ -231,11 +243,13 @@ void Reorder::prepareParams() { const auto& dstStrides = childDesc->as()->getStrides(); const auto& dstOrder = childDesc->as()->getOrder(); const size_t channelDim = 1; - if (dstStrides.back() != 1) + if (dstStrides.back() != 1) { return false; + } for (int i = inDims.size() - 1; i > 0; i--) { - if (dstStrides[i - 1] != dstStrides[i] * inDims[dstOrder[i]] && dstOrder[i] != channelDim) + if (dstStrides[i - 1] != dstStrides[i] * inDims[dstOrder[i]] && dstOrder[i] != channelDim) { return false; + } } return true; }; @@ -248,12 +262,15 @@ void Reorder::prepareParams() { } } if (!canUseNcsp2Nspc && !canUseNspc2Ncsp) { - if (!dstMemPtr || !dstMemPtr->isDefined()) + if (!dstMemPtr || !dstMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined destination memory object."); - if (!srcMemPtr || !srcMemPtr->isDefined()) + } + if (!srcMemPtr || !srcMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined input memory object."); - if (getSelectedPrimitiveDescriptor() == nullptr) + } + if (getSelectedPrimitiveDescriptor() == nullptr) { THROW_CPU_NODE_ERR("does not have preferable primitive descriptor."); + } createReorderPrimitive(srcMemPtr->getDescWithType(), dstMemPtr->getDescWithType()); @@ -262,8 +279,9 @@ void Reorder::prepareParams() { void Reorder::createReorderPrimitive(const DnnlMemoryDescPtr& srcDesc, const DnnlMemoryDescPtr& dstDesc) { auto selectedPD = getSelectedPrimitiveDescriptor(); - if (!selectedPD) + if (!selectedPD) { THROW_CPU_NODE_ERR("does not have preferable primitive descriptor."); + } const auto engine = getEngine(); auto src_desc = srcDesc->getDnnlDesc(); @@ -437,8 +455,8 @@ void Reorder::execute(const dnnl::stream& strm) { std::string Reorder::getReorderArgs(const MemoryDesc& parentDesc, const MemoryDesc& childDesc) { std::string inArgs, outArgs; if (parentDesc.getPrecision() != childDesc.getPrecision()) { - inArgs += (inArgs.empty() ? "" : "_") + std::string(parentDesc.getPrecision().get_type_name()); - outArgs += (outArgs.empty() ? "" : "_") + std::string(childDesc.getPrecision().get_type_name()); + inArgs += (inArgs.empty() ? "" : "_") + static_cast(parentDesc.getPrecision().get_type_name()); + outArgs += (outArgs.empty() ? "" : "_") + static_cast(childDesc.getPrecision().get_type_name()); } auto formatSrc = parentDesc.serializeFormat(); auto formatDst = childDesc.serializeFormat(); @@ -450,8 +468,9 @@ std::string Reorder::getReorderArgs(const MemoryDesc& parentDesc, const MemoryDe } void Reorder::reorderData(const IMemory& input, const IMemory& output, const MultiCachePtr& cache) { - if (!input.getDesc().isDefined() || !output.getDesc().isDefined()) + if (!input.getDesc().isDefined() || !output.getDesc().isDefined()) { OPENVINO_THROW("Can't reorder data with dynamic shapes"); + } if (input.getShape().hasZeroDims() || output.getShape().hasZeroDims()) { return; diff --git a/src/plugins/intel_cpu/src/nodes/reorg_yolo.cpp b/src/plugins/intel_cpu/src/nodes/reorg_yolo.cpp index 2c098d83d97215..26aa63f3d412f7 100644 --- a/src/plugins/intel_cpu/src/nodes/reorg_yolo.cpp +++ b/src/plugins/intel_cpu/src/nodes/reorg_yolo.cpp @@ -33,19 +33,22 @@ ReorgYolo::ReorgYolo(const std::shared_ptr& op, const GraphContext::CP OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } - if (getOriginalInputsNumber() != 1 || getOriginalOutputsNumber() != 1) + if (getOriginalInputsNumber() != 1 || getOriginalOutputsNumber() != 1) { THROW_CPU_NODE_ERR("has incorrect number of input/output edges!"); + } const auto reorgYolo = ov::as_type_ptr(op); const auto strides = reorgYolo->get_strides(); - if (strides.empty()) + if (strides.empty()) { THROW_CPU_NODE_ERR("has empty strides"); + } stride = strides[0]; } void ReorgYolo::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } addSupportedPrimDesc({{LayoutType::ncsp, ov::element::f32}}, {{LayoutType::ncsp, ov::element::f32}}, diff --git a/src/plugins/intel_cpu/src/nodes/reshape.cpp b/src/plugins/intel_cpu/src/nodes/reshape.cpp index c79f430aac4bd6..e375bb6aadd499 100644 --- a/src/plugins/intel_cpu/src/nodes/reshape.cpp +++ b/src/plugins/intel_cpu/src/nodes/reshape.cpp @@ -47,8 +47,9 @@ Reshape::Reshape(const std::shared_ptr& op, const GraphContext::CPtr& if (ov::as_type_ptr(op)) { checkSecondInput(op, "Reshape"); } else if (ov::as_type_ptr(op)) { - if (op->get_input_size() == 1) + if (op->get_input_size() == 1) { OPENVINO_THROW("CPU plug-in doesn't support Squeeze node with inputs num equal 1"); + } checkSecondInput(op, "Squeeze"); } else if (ov::as_type_ptr(op)) { checkSecondInput(op, "Unsqueeze"); @@ -79,15 +80,18 @@ bool Reshape::needShapeInfer() const { } void Reshape::getSupportedDescriptors() { - if (getParentEdges().size() != 1 && getParentEdges().size() != 2) + if (getParentEdges().size() != 1 && getParentEdges().size() != 2) { OPENVINO_THROW("Incorrect number of input edges for layer ", getName()); - if (getChildEdges().empty()) + } + if (getChildEdges().empty()) { OPENVINO_THROW("Incorrect number of output edges for layer ", getName()); + } } void Reshape::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } ov::element::Type inPrec = getOriginalInputPrecisionAtPort(0); ov::element::Type outPrec = getOriginalOutputPrecisionAtPort(0); @@ -95,14 +99,16 @@ void Reshape::initSupportedPrimitiveDescriptors() { // Current reshape implementation is simple memory reinterpret, // same precision on input and output is required - if (inPrec != outPrec) + if (inPrec != outPrec) { inPrec = outPrec; + } bool canBeInPlace = true; // CVS-81059 : disable inPlace in following case since it won't be satisfied by framework - if (!isConstant() && getParentEdgeAt(0)->getParent()->isConstant()) + if (!isConstant() && getParentEdgeAt(0)->getParent()->isConstant()) { canBeInPlace = false; + } NodeConfig config; config.inConfs.resize(getParentEdges().size()); diff --git a/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp b/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp index de8e0319b8d525..e5671234628725 100644 --- a/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp +++ b/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp @@ -36,41 +36,50 @@ ReverseSequence::ReverseSequence(const std::shared_ptr& op, const Grap } const auto revSeq = ov::as_type_ptr(op); - if (revSeq == nullptr) + if (revSeq == nullptr) { THROW_CPU_NODE_ERR("is not an instance of ReverseSequence from opset1."); + } - if (inputShapes.size() != 2 || outputShapes.size() != 1) + if (inputShapes.size() != 2 || outputShapes.size() != 1) { THROW_CPU_NODE_ERR("has incorrect number of input/output edges!"); + } const auto dataRank = getInputShapeAtPort(REVERSESEQUENCE_DATA).getRank(); - if (dataRank < 2) + if (dataRank < 2) { THROW_CPU_NODE_ERR("'data' rank should be greater than or equal to 2"); + } - if (getInputShapeAtPort(REVERSESEQUENCE_LENGTHS).getRank() != 1) + if (getInputShapeAtPort(REVERSESEQUENCE_LENGTHS).getRank() != 1) { THROW_CPU_NODE_ERR("'seq_lengths' should be 1D tensor"); + } - if (dataRank != getOutputShapeAtPort(0).getRank()) + if (dataRank != getOutputShapeAtPort(0).getRank()) { THROW_CPU_NODE_ERR("has input/output rank mismatch"); + } seq_axis = revSeq->get_sequence_axis(); - if (seq_axis < 0 || seq_axis >= static_cast(dataRank)) + if (seq_axis < 0 || seq_axis >= static_cast(dataRank)) { THROW_CPU_NODE_ERR("has incorrect 'seq_axis' parameters dimensions and axis number!"); + } batch_axis = revSeq->get_batch_axis(); - if (batch_axis < 0 || batch_axis >= static_cast(dataRank)) + if (batch_axis < 0 || batch_axis >= static_cast(dataRank)) { THROW_CPU_NODE_ERR("has incorrect 'batch_axis' parameters dimensions and axis number!"); + } } void ReverseSequence::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } lengthsPrecision = getOriginalInputPrecisionAtPort(REVERSESEQUENCE_LENGTHS); - if (lengthsPrecision != ov::element::i32 && lengthsPrecision != ov::element::f32) + if (lengthsPrecision != ov::element::i32 && lengthsPrecision != ov::element::f32) { lengthsPrecision = ov::element::i32; + } addSupportedPrimDesc({{LayoutType::ncsp, ov::element::f32}, {LayoutType::ncsp, lengthsPrecision}}, {{LayoutType::ncsp, ov::element::f32}}, @@ -82,14 +91,18 @@ void ReverseSequence::prepareParams() { const auto& seqLengthsMemPtr = getSrcMemoryAtPort(REVERSESEQUENCE_LENGTHS); const auto& dstMemPtr = getDstMemoryAtPort(0); - if (!dataMemPtr || !dataMemPtr->isDefined()) + if (!dataMemPtr || !dataMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined input memory of 'data'"); - if (!seqLengthsMemPtr || !seqLengthsMemPtr->isDefined()) + } + if (!seqLengthsMemPtr || !seqLengthsMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined input memory of 'seq_lengths'"); - if (!dstMemPtr || !dstMemPtr->isDefined()) + } + if (!dstMemPtr || !dstMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined output memory"); - if (getSelectedPrimitiveDescriptor() == nullptr) + } + if (getSelectedPrimitiveDescriptor() == nullptr) { THROW_CPU_NODE_ERR("has unidentified preferable primitive descriptor"); + } const VectorDims& dataDims = dataMemPtr->getStaticDims(); const VectorDims& seqLengthsDims = seqLengthsMemPtr->getStaticDims(); @@ -110,12 +123,14 @@ ReverseSequence::ReverseSequenceExecutor::ReverseSequenceExecutor(const VectorDi : batchAxis{batchAxis}, seqAxis{seqAxis} { for (size_t i = 0; i < dataDims.size(); ++i) { - if (dataDims[i] != dstDims[i]) + if (dataDims[i] != dstDims[i]) { OPENVINO_THROW("Input/output tensors dimensions mismatch"); + } } - if (seqLengthsDims[0] != dataDims[batchAxis]) + if (seqLengthsDims[0] != dataDims[batchAxis]) { OPENVINO_THROW("'seq_lengths' dimension mismatch"); + } srcStrides.resize(dataDims.size()); srcStrides[srcStrides.size() - 1] = 1; @@ -162,29 +177,33 @@ void ReverseSequence::ReverseSequenceExecutor::exec(const MemoryPtr& dataMemPtr, dstData[iwork] = srcData[srcIdx]; for (int j = srcDims.size() - 1; j >= 0; --j) { counters[j] = (counters[j] + 1) % srcDims[j]; - if (counters[j] != 0) + if (counters[j] != 0) { break; + } } } }); } void ReverseSequence::execute(const dnnl::stream& strm) { - if (!execPtr) + if (!execPtr) { THROW_CPU_NODE_ERR("has no compiled executor"); + } const auto precision = getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getMemory().getDesc().getPrecision(); - if (!one_of(precision, ov::element::f32, ov::element::i32)) + if (!one_of(precision, ov::element::f32, ov::element::i32)) { OPENVINO_THROW("ReverseSequence layer does not support ", precision, " precision"); + } - if (precision == ov::element::f32) + if (precision == ov::element::f32) { execPtr->exec(getSrcMemoryAtPort(REVERSESEQUENCE_DATA), getSrcMemoryAtPort(REVERSESEQUENCE_LENGTHS), getDstMemoryAtPort(0)); - else + } else { execPtr->exec(getSrcMemoryAtPort(REVERSESEQUENCE_DATA), getSrcMemoryAtPort(REVERSESEQUENCE_LENGTHS), getDstMemoryAtPort(0)); + } } bool ReverseSequence::created() const { diff --git a/src/plugins/intel_cpu/src/nodes/rms_norm.cpp b/src/plugins/intel_cpu/src/nodes/rms_norm.cpp index 85d09ae093ce10..20b7d2694ffdca 100644 --- a/src/plugins/intel_cpu/src/nodes/rms_norm.cpp +++ b/src/plugins/intel_cpu/src/nodes/rms_norm.cpp @@ -67,8 +67,9 @@ static std::shared_ptr createJitKernel(const kernel::jit_ res = std::make_shared>(param); } - if (res) + if (res) { res->create_kernel(); + } return res; } @@ -128,11 +129,13 @@ RMSNorm::RMSNorm(const std::shared_ptr& op, const GraphContext::CPtr& } void RMSNorm::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } auto precision = getOriginalInputPrecisionAtPort(0); - if (!one_of(precision, ov::element::f32, ov::element::bf16, ov::element::f16)) + if (!one_of(precision, ov::element::f32, ov::element::bf16, ov::element::f16)) { precision = ov::element::f32; + } impl_desc_type impl_type; if (mayiuse(cpu::x64::avx512_core)) { diff --git a/src/plugins/intel_cpu/src/nodes/rnn.cpp b/src/plugins/intel_cpu/src/nodes/rnn.cpp index 8140624edd3c37..9668f18f6c0533 100644 --- a/src/plugins/intel_cpu/src/nodes/rnn.cpp +++ b/src/plugins/intel_cpu/src/nodes/rnn.cpp @@ -58,19 +58,21 @@ static dnnl::algorithm ie2dnnl(const std::shared_ptr& op) { ov::op::v5::GRUSequence::get_type_info_static())) { auto gruCellOp = ov::as_type_ptr(op); auto gruSeqOp = ov::as_type_ptr(op); - if ((gruCellOp && gruCellOp->get_linear_before_reset()) || (gruSeqOp && gruSeqOp->get_linear_before_reset())) + if ((gruCellOp && gruCellOp->get_linear_before_reset()) || (gruSeqOp && gruSeqOp->get_linear_before_reset())) { return dnnl::algorithm::lbr_gru; - else + } else { return dnnl::algorithm::vanilla_gru; + } } else if (one_of(op->get_type_info(), ov::op::internal::AUGRUCell::get_type_info_static(), ov::op::internal::AUGRUSequence::get_type_info_static())) { auto gruCellOp = ov::as_type_ptr(op); auto gruSeqOp = ov::as_type_ptr(op); - if ((gruCellOp && gruCellOp->get_linear_before_reset()) || (gruSeqOp && gruSeqOp->get_linear_before_reset())) + if ((gruCellOp && gruCellOp->get_linear_before_reset()) || (gruSeqOp && gruSeqOp->get_linear_before_reset())) { return dnnl::algorithm::lbr_augru; - else + } else { return dnnl::algorithm::vanilla_augru; + } } else if (one_of(op->get_type_info(), ov::op::v0::LSTMCell::get_type_info_static(), ov::op::v4::LSTMCell::get_type_info_static(), @@ -158,12 +160,14 @@ size_t RNNKey::hash() const { size_t seed = 0lu; for (auto& desc : inDataDescs) { - if (desc != nullptr) + if (desc != nullptr) { seed = hash_combine(seed, get_md_hash(*desc->getDnnlDesc().get())); + } } for (auto& desc : outDataDescs) { - if (desc != nullptr) + if (desc != nullptr) { seed = hash_combine(seed, get_md_hash(*desc->getDnnlDesc().get())); + } } for (auto& desc : wDescs) { seed = hash_combine(seed, get_md_hash(*desc.get())); @@ -184,18 +188,21 @@ bool RNNKey::operator==(const RNNKey& rhs) const { for (size_t i = 0lu; i < inDataDescs.size(); i++) { if (inDataDescs[i] != rhs.inDataDescs[i] && (inDataDescs[i] == nullptr || rhs.inDataDescs[i] == nullptr || - inDataDescs[i]->getDnnlDesc() != rhs.inDataDescs[i]->getDnnlDesc())) + inDataDescs[i]->getDnnlDesc() != rhs.inDataDescs[i]->getDnnlDesc())) { return false; + } } for (size_t i = 0lu; i < outDataDescs.size(); i++) { if (outDataDescs[i] != rhs.outDataDescs[i] && (outDataDescs[i] == nullptr || rhs.outDataDescs[i] == nullptr || - outDataDescs[i]->getDnnlDesc() != rhs.outDataDescs[i]->getDnnlDesc())) + outDataDescs[i]->getDnnlDesc() != rhs.outDataDescs[i]->getDnnlDesc())) { return false; + } } for (size_t i = 0lu; i < wDescs.size(); i++) { - if (wDescs[i] != rhs.wDescs[i]) + if (wDescs[i] != rhs.wDescs[i]) { return false; + } } return true; @@ -319,8 +326,9 @@ bool RNN::isSupportedOperation(const std::shared_ptr& op, std::s // WA: dynamic shapes make impossible to check seq_len due to shapeOf subgraphs // but the sequence is still supported in CPU and doesn't need to be decomposed - if (data_pshape.is_dynamic()) + if (data_pshape.is_dynamic()) { return true; + } const int64_t maxSeqLenDimIdx = 1; @@ -483,31 +491,37 @@ RNN::RNN(const std::shared_ptr& op, const GraphContext::CPtr& context) } auto rnnCellBase = ov::as_type_ptr(op); - if (!rnnCellBase) + if (!rnnCellBase) { THROW_CPU_NODE_ERR("does not have original layer for RNNCell."); + } cell_type = ie2dnnl(op); - if (!rnnCellBase->get_activations().empty()) + if (!rnnCellBase->get_activations().empty()) { cell_act = ie2dnnl(rnnCellBase->get_activations()[0]); // Works only for RNN with one gate + } G = gatesCount(cell_type); Gb = (cell_type != dnnl::algorithm::lbr_gru) ? G : G + 1; S = statesCount(cell_type); SC = rnnCellBase->get_hidden_size(); N = {getInputShapeAtPort(0).getMinDims()[0], getInputShapeAtPort(0).getMaxDims()[0]}; - if (!is_cell) + if (!is_cell) { N_SEQ = {getInputShapeAtPort(sIdx).getMinDims()[0], getInputShapeAtPort(sIdx).getMaxDims()[0]}; + } const auto& rtInfo = op->get_rt_info(); - if (rtInfo.count("inputScale")) + if (rtInfo.count("inputScale")) { inputScale = rtInfo.at("inputScale").as(); + } - if (rtInfo.count("inputShift")) + if (rtInfo.count("inputShift")) { inputShift = rtInfo.at("inputShift").as(); + } - if (rtInfo.count("weightsScales")) + if (rtInfo.count("weightsScales")) { weightsScales = rtInfo.at("weightsScales").as>(); + } if (is_cell) { initCell(); @@ -530,68 +544,81 @@ bool RNN::created() const { void RNN::configurePortDataTypes() { inDataTypes[xIdx] = DnnlExtensionUtils::ElementTypeToDataType(getOriginalInputPrecisionAtPort(0)); inDataTypes[hIdx] = DnnlExtensionUtils::ElementTypeToDataType(getOriginalInputPrecisionAtPort(1)); - if (haveCellState(cell_type)) + if (haveCellState(cell_type)) { inDataTypes[cIdx] = memory::data_type::f32; // @todo bf16 is also allowed, should be tried out - if (!is_cell) + } + if (!is_cell) { inDataTypes[sIdx] = memory::data_type::s32; + } inDataTypes[wIdx] = DnnlExtensionUtils::ElementTypeToDataType(getOriginalInputPrecisionAtPort(wIdx)); inDataTypes[rIdx] = DnnlExtensionUtils::ElementTypeToDataType(getOriginalInputPrecisionAtPort(rIdx)); inDataTypes[bIdx] = memory::data_type::f32; // @todo bf16 is also allowed, should be tried out - if (haveAttention(cell_type)) + if (haveAttention(cell_type)) { inDataTypes[aIdx] = DnnlExtensionUtils::ElementTypeToDataType(getOriginalInputPrecisionAtPort(aIdx)); + } - if (!is_cell) + if (!is_cell) { outDataTypes[yIdx] = DnnlExtensionUtils::ElementTypeToDataType(getOriginalOutputPrecisionAtPort(0)); + } outDataTypes[hoIdx] = inDataTypes[hIdx]; // required by oneDNN. Output hidden state is a input hidden state for the next iteration - if (haveCellState(cell_type)) + if (haveCellState(cell_type)) { outDataTypes[coIdx] = inDataTypes[cIdx]; // required by oneDNN. + } - if (one_of(memory::data_type::bf16, inDataTypes[xIdx], inDataTypes[hIdx])) + if (one_of(memory::data_type::bf16, inDataTypes[xIdx], inDataTypes[hIdx])) { inDataTypes[xIdx] = outDataTypes[yIdx] = outDataTypes[hoIdx] = inDataTypes[hIdx] = memory::data_type::bf16; // required by oneDNN. + } - if (one_of(memory::data_type::f16, inDataTypes[xIdx], inDataTypes[hIdx])) + if (one_of(memory::data_type::f16, inDataTypes[xIdx], inDataTypes[hIdx])) { // onednn doesn't have fp16 instance inDataTypes[xIdx] = outDataTypes[yIdx] = outDataTypes[hoIdx] = inDataTypes[hIdx] = memory::data_type::f32; // required by oneDNN. + } // OneDNN unsupported fp16 precision for this layer - if (cell_type == dnnl::algorithm::vanilla_augru && inDataTypes[aIdx] == memory::data_type::f16) + if (cell_type == dnnl::algorithm::vanilla_augru && inDataTypes[aIdx] == memory::data_type::f16) { inDataTypes[aIdx] = memory::data_type::f32; + } if (outDataTypes[yIdx] == memory::data_type::bf16 && - one_of(inDataTypes[xIdx], memory::data_type::s8, memory::data_type::u8)) + one_of(inDataTypes[xIdx], memory::data_type::s8, memory::data_type::u8)) { outDataTypes[yIdx] = memory::data_type::f32; // oneDNN does not support bf16 output precision for quantized rnn primitive yet + } } void RNN::getSupportedDescriptors() { configurePortDataTypes(); - if (is_cell) + if (is_cell) { fillCellDesc(); - else + } else { fillSequenceDesc(); + } } void RNN::initCell() { - if (getInputShapeAtPort(0).getRank() != 2lu || getInputShapeAtPort(1).getRank() != 2lu) + if (getInputShapeAtPort(0).getRank() != 2lu || getInputShapeAtPort(1).getRank() != 2lu) { THROW_CPU_NODE_ERR("has incorrect input ranks. Data rank: ", getInputShapeAtPort(0).getRank(), "; Hidden state rank: ", getInputShapeAtPort(1).getRank()); - if (is_augru && getInputShapeAtPort(5).getRank() != 2lu) + } + if (is_augru && getInputShapeAtPort(5).getRank() != 2lu) { THROW_CPU_NODE_ERR("has incorrect input ranks. Attention rank: ", getInputShapeAtPort(2).getRank()); + } T = {1, 1}; - if (cell_type == algorithm::vanilla_lstm) + if (cell_type == algorithm::vanilla_lstm) { DC = getInputShapeAtPort(3).getDims()[1]; - else + } else { DC = getInputShapeAtPort(2).getDims()[1]; + } if (N.isStatic()) { // Expected shapes. @@ -611,11 +638,12 @@ void RNN::initCell() { if (S == 2) { if ((getInputShapeAtPort(2).isStatic() && getInputShapeAtPort(2) != shapeS) || - (getOutputShapeAtPort(1).isStatic() && getOutputShapeAtPort(1) != shapeS)) + (getOutputShapeAtPort(1).isStatic() && getOutputShapeAtPort(1) != shapeS)) { THROW_CPU_NODE_ERR("has incorrect input/output shapes. Cell state input: ", getInputShapeAtPort(2).toString(), "; Cell state output: ", getOutputShapeAtPort(1).toString()); + } } if (is_augru) { @@ -713,22 +741,26 @@ void RNN::initSequence() { const auto& inDataShape = getInputShapeAtPort(0); const auto& outDataShape = getOutputShapeAtPort(0); - if (inDataShape.getRank() != 3lu || outDataShape.getRank() != 4lu) + if (inDataShape.getRank() != 3lu || outDataShape.getRank() != 4lu) { THROW_CPU_NODE_ERR("has incorrect input/output shapes. Input data shape: ", inDataShape.toString(), " Output shape: ", outDataShape.toString()); + } - if (!one_of(getOriginalInputsNumber(), 6u, 7u)) + if (!one_of(getOriginalInputsNumber(), 6u, 7u)) { THROW_CPU_NODE_ERR("has incorrect number of input ports: ", getOriginalInputsNumber()); - if (!one_of(getOriginalOutputsNumber(), 2u, 3u)) + } + if (!one_of(getOriginalOutputsNumber(), 2u, 3u)) { THROW_CPU_NODE_ERR("has incorrect number of output ports: ", getOriginalOutputsNumber()); + } T = {inDataShape.getMinDims()[1], inDataShape.getMaxDims()[1]}; - if (cell_type == algorithm::vanilla_lstm) + if (cell_type == algorithm::vanilla_lstm) { DC = getInputShapeAtPort(4).getDims()[2]; - else + } else { DC = getInputShapeAtPort(3).getDims()[2]; + } // layer input plus states if (haveAttention(cell_type)) { @@ -1294,12 +1326,14 @@ Node::AttrPtr RNN::initPrimitiveAttr() { void RNN::prepareParams() { for (size_t i = 0; i < wIdx; i++) { auto memPtr = getSrcMemoryAtPort(i); - if (!memPtr || !memPtr->isDefined()) + if (!memPtr || !memPtr->isDefined()) { THROW_CPU_NODE_ERR("has uninitialized memory at port ", i); + } } if ((is_cell && DC != getParentEdgeAt(0)->getMemory().getDesc().getShape().getStaticDims()[1]) || - (!is_cell && DC != getParentEdgeAt(0)->getMemory().getDesc().getShape().getStaticDims()[2])) + (!is_cell && DC != getParentEdgeAt(0)->getMemory().getDesc().getShape().getStaticDims()[2])) { THROW_CPU_NODE_ERR("has incorrect input size value in the first input."); + } auto dataMemPtr = getSrcMemoryAtPort(0); const size_t B = dataMemPtr->getShape().getStaticDims()[0]; @@ -1388,8 +1422,9 @@ std::shared_ptr RNN::getDstMemDesc(const dnnl::primitive_desc& prim_ } void RNN::execute(const dnnl::stream& strm) { - if (!execPtr) + if (!execPtr) { THROW_CPU_NODE_ERR("does not have initialized primitive to execute."); + } const auto src_data_mem = getSrcMemoryAtPort(0); const auto dst_data_mem = getDstMemoryAtPort(0); diff --git a/src/plugins/intel_cpu/src/nodes/roi_align.cpp b/src/plugins/intel_cpu/src/nodes/roi_align.cpp index 8b6d388551f034..cd80985943ea32 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_align.cpp +++ b/src/plugins/intel_cpu/src/nodes/roi_align.cpp @@ -488,10 +488,11 @@ struct jit_uni_roi_align_kernel_f32 : public jit_uni_roi_align_kernel, public ji load_idx(reg_buf, vmm_buf, v_step); - if (jcp_.data_prc == ov::element::f32) + if (jcp_.data_prc == ov::element::f32) { gather_f32(vmm_src, reg_src, vmm_buf); - else if (jcp_.data_prc == ov::element::bf16) + } else if (jcp_.data_prc == ov::element::bf16) { gather_bf16_to_f32_zmm(vmm_src, reg_src, vmm_buf); + } uni_vmovups(vmm_weights, ptr[reg_weights]); @@ -527,8 +528,9 @@ struct jit_uni_roi_align_kernel_f32 : public jit_uni_roi_align_kernel, public ji } L(main_loop_end_label); - if (jcp_.alg == Algorithm::ROIAlignAvg) + if (jcp_.alg == Algorithm::ROIAlignAvg) { uni_vpxor(vmm_dst_tail, vmm_dst_tail, vmm_dst_tail); + } lane = 1; L(tail_loop_label); @@ -538,10 +540,11 @@ struct jit_uni_roi_align_kernel_f32 : public jit_uni_roi_align_kernel, public ji load_idx(reg_buf, vmm_buf, x_step); - if (jcp_.data_prc == ov::element::f32) + if (jcp_.data_prc == ov::element::f32) { gather_f32_xmm(xmm_src, reg_src, xmm_buf); - else if (jcp_.data_prc == ov::element::bf16) + } else if (jcp_.data_prc == ov::element::bf16) { gather_bf16_to_f32_xmm(xmm_src, reg_src, xmm_buf); + } uni_vmovups(xmm_weights, ptr[reg_weights]); if (jcp_.alg == Algorithm::ROIAlignAvg) { @@ -568,10 +571,11 @@ struct jit_uni_roi_align_kernel_f32 : public jit_uni_roi_align_kernel, public ji } // xmm_dst[0] of f32 is the dst value - if (jcp_.data_prc == ov::element::f32) + if (jcp_.data_prc == ov::element::f32) { uni_vpextrd(ptr[reg_dst], xmm_dst, 0); - else if (jcp_.data_prc == ov::element::bf16) + } else if (jcp_.data_prc == ov::element::bf16) { uni_vpextrw(ptr[reg_dst], xmm_dst, 1); + } } // gather f32 data from reg_src with vmm_idx(data_size) to vmm_src with f32 precision @@ -605,8 +609,9 @@ struct jit_uni_roi_align_kernel_f32 : public jit_uni_roi_align_kernel, public ji // gather bf16 data from reg_src with vmm_idx(data_size) to vmm_src with f32 precision // bf16 is needed from avx512_core inline void gather_bf16_to_f32_zmm(Vmm vmm_src, const reg64_t reg_src, const Vmm vmm_idx) { - if (!std::is_same::value) + if (!std::is_same::value) { OPENVINO_THROW("bf16 is only supported from avx512_core platform for ROIAlign node."); + } sub(rsp, v_len); uni_vmovdqu(ptr[rsp], vmm_idx); for (int i = 0; i < v_step; i++) { @@ -723,10 +728,12 @@ ROIAlign::ROIAlign(const std::shared_ptr& op, const GraphContext::CPtr } void ROIAlign::getSupportedDescriptors() { - if (getParentEdges().size() != 3) + if (getParentEdges().size() != 3) { THROW_CPU_NODE_ERR("has incorrect number of input edges: ", getParentEdges().size()); - if (getChildEdges().empty()) + } + if (getChildEdges().empty()) { THROW_CPU_NODE_ERR("has incorrect number of output edges: ", getChildEdges().size()); + } if (getInputShapeAtPort(0).getRank() != 4) { THROW_CPU_NODE_ERR("doesn't support 0th input with rank: ", getInputShapeAtPort(0).getRank()); @@ -775,14 +782,16 @@ void ROIAlign::createJitKernel(const ov::element::Type& dataPrec, const ROIAlign } else if (mayiuse(cpu::x64::sse41)) { roi_align_kernel.reset(new jit_uni_roi_align_kernel_f32(jcp)); } - if (roi_align_kernel) + if (roi_align_kernel) { roi_align_kernel->create_ker(); + } #endif } void ROIAlign::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } ov::element::Type inputPrec0 = getOriginalInputPrecisionAtPort(0); ov::element::Type outputPrec = getOriginalOutputPrecisionAtPort(0); @@ -831,10 +840,12 @@ void ROIAlign::initSupportedPrimitiveDescriptors() { void ROIAlign::createPrimitive() { auto srcMemPtr = getSrcMemoryAtPort(0); auto dstMemPtr = getDstMemoryAtPort(0); - if (!srcMemPtr) + if (!srcMemPtr) { THROW_CPU_NODE_ERR("has null input memory"); - if (!dstMemPtr) + } + if (!dstMemPtr) { THROW_CPU_NODE_ERR("has null destination memory"); + } if (!roi_align_kernel) { ROIAlignLayoutType selectedLayout = ROIAlignLayoutType::nspc; @@ -867,8 +878,9 @@ struct ROIAlign::ROIAlignExecute { void ROIAlign::execute(const dnnl::stream& strm) { auto inputPrec = getParentEdgeAt(0)->getMemory().getDataType(); auto outputPrec = getChildEdgeAt(0)->getMemory().getDataType(); - if (!((inputPrec == dnnl_bf16 && outputPrec == dnnl_bf16) || (inputPrec == dnnl_f32 && outputPrec == dnnl_f32))) + if (!((inputPrec == dnnl_bf16 && outputPrec == dnnl_bf16) || (inputPrec == dnnl_f32 && outputPrec == dnnl_f32))) { OPENVINO_THROW("ROIAlign doesn't support demanded precisions"); + } ROIAlignContext ctx = {*this}; @@ -925,10 +937,11 @@ void ROIAlign::executeSpecified() { std::vector> weightsTbl(realRois); std::vector> srcAddressListTbl; std::vector> srcIndexTbl; - if (!isPlainFmt) + if (!isPlainFmt) { srcAddressListTbl.resize(realRois); - else + } else { srcIndexTbl.resize(realRois); + } bool aligned = false; float offset_src = 0; @@ -987,10 +1000,11 @@ void ROIAlign::executeSpecified() { // prepare arrays for sampling points and weights size_t paramsSize = BLIParamsNum * numSamplesInBin * binCount; weightsTbl[n] = std::vector(paramsSize, 0.f); - if (!isPlainFmt) + if (!isPlainFmt) { srcAddressListTbl[n] = std::vector(paramsSize, 0); - else + } else { srcIndexTbl[n] = std::vector(paramsSize, 0); + } size_t batchSrcOffset = roiBatchInd * batchInputStride; int idxIter = 0; @@ -1012,14 +1026,17 @@ void ROIAlign::executeSpecified() { // For this sample we save 4 index of (0,0) and 4 weight of 0 if (!isPlainFmt) { auto startPoint = reinterpret_cast(&srcData[batchSrcOffset]); - for (int i = 0; i < BLIParamsNum; i++) + for (int i = 0; i < BLIParamsNum; i++) { srcAddressListTbl[n][idxIter + i] = startPoint; + } } else { - for (int i = 0; i < BLIParamsNum; i++) + for (int i = 0; i < BLIParamsNum; i++) { srcIndexTbl[n][idxIter + i] = 0; + } } - for (int i = 0; i < BLIParamsNum; i++) + for (int i = 0; i < BLIParamsNum; i++) { weightsTbl[n][idxIter + i] = 0.f; + } idxIter += BLIParamsNum; continue; } diff --git a/src/plugins/intel_cpu/src/nodes/roi_align_rotated.cpp b/src/plugins/intel_cpu/src/nodes/roi_align_rotated.cpp index d9566c679ce438..700bbd6fbdff24 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_align_rotated.cpp +++ b/src/plugins/intel_cpu/src/nodes/roi_align_rotated.cpp @@ -28,8 +28,9 @@ void ROIAlignRotated::getSupportedDescriptors() { } void ROIAlignRotated::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } ov::element::Type inputPrec0 = getOriginalInputPrecisionAtPort(0); ov::element::Type outputPrec = getOriginalOutputPrecisionAtPort(0); diff --git a/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp b/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp index db00dfe607c1c4..b3ce626b60a60a 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp @@ -310,19 +310,21 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi cmp(reg_bin_area, 0); je(empty_roi_label, T_NEAR); - if (jpp_.alg == Algorithm::ROIPoolingMax) + if (jpp_.alg == Algorithm::ROIPoolingMax) { roi_pool_max(c_blocks); - else + } else { roi_pool_bilinear(c_blocks); + } if (isa == cpu::x64::sse41) { add(reg_input, 4 * jpp_.src_prc.size()); add(reg_output, 4 * jpp_.dst_prc.size()); - if (jpp_.alg == Algorithm::ROIPoolingMax) + if (jpp_.alg == Algorithm::ROIPoolingMax) { roi_pool_max(c_blocks); - else + } else { roi_pool_bilinear(c_blocks); + } } jmp(exit_label, T_NEAR); @@ -421,10 +423,12 @@ ROIPooling::ROIPooling(const std::shared_ptr& op, const GraphContext:: } void ROIPooling::getSupportedDescriptors() { - if (getParentEdges().size() != 2) + if (getParentEdges().size() != 2) { THROW_CPU_NODE_ERR("has incorrect number of input edges: ", getParentEdges().size()); - if (getChildEdges().empty()) + } + if (getChildEdges().empty()) { THROW_CPU_NODE_ERR("has incorrect number of output edges: ", getChildEdges().size()); + } if (getInputShapeAtPort(0).getRank() != 4) { THROW_CPU_NODE_ERR("doesn't support 0th input with rank: ", getInputShapeAtPort(0).getRank()); @@ -445,8 +449,9 @@ void ROIPooling::getSupportedDescriptors() { } void ROIPooling::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } auto format = mayiuse(avx512_core) ? LayoutType::nCsp16c : LayoutType::nCsp8c; impl_desc_type impl_type; @@ -463,8 +468,9 @@ void ROIPooling::initSupportedPrimitiveDescriptors() { refParams.src_prc = getOriginalInputPrecisionAtPort(0); if (!mayiuse(avx512_core)) { - if (refParams.src_prc == ov::element::bf16) + if (refParams.src_prc == ov::element::bf16) { refParams.src_prc = ov::element::f32; + } } if (impl_type != impl_desc_type::ref && refParams.src_prc == ov::element::f16) { @@ -478,8 +484,9 @@ void ROIPooling::initSupportedPrimitiveDescriptors() { void ROIPooling::createPrimitive() { auto selectedPD = getSelectedPrimitiveDescriptor(); - if (!selectedPD) + if (!selectedPD) { OPENVINO_THROW("CPU ROI Pooling node with name '", getName(), "' doesn't have primitive descriptors."); + } refParams.c_block = mayiuse(cpu::x64::avx512_core) ? 16 : 8; ; @@ -491,8 +498,9 @@ void ROIPooling::createPrimitive() { refParams.dst_prc = config.outConfs[0].getMemDesc()->getPrecision(); if (inputShapesDefined()) { - if (needPrepareParams() && isExecutable()) + if (needPrepareParams() && isExecutable()) { prepareParams(); + } updateLastInputDims(); } } @@ -516,14 +524,18 @@ void ROIPooling::prepareParams() { const auto& srcMemPtr0 = getSrcMemoryAtPort(0); const auto& srcMemPtr1 = getSrcMemoryAtPort(0); const auto& dstMemPtr = getDstMemoryAtPort(0); - if (!srcMemPtr0 || !srcMemPtr0->isDefined()) + if (!srcMemPtr0 || !srcMemPtr0->isDefined()) { OPENVINO_THROW("Input memory is undefined."); - if (!srcMemPtr1 || !srcMemPtr1->isDefined()) + } + if (!srcMemPtr1 || !srcMemPtr1->isDefined()) { OPENVINO_THROW("Input memory is undefined."); - if (!dstMemPtr || !dstMemPtr->isDefined()) + } + if (!dstMemPtr || !dstMemPtr->isDefined()) { OPENVINO_THROW("Destination is undefined."); - if (getSelectedPrimitiveDescriptor() == nullptr) + } + if (getSelectedPrimitiveDescriptor() == nullptr) { OPENVINO_THROW("Preferable primitive descriptor is not set."); + } const auto& inDims = getParentEdgeAt(0)->getMemory().getStaticDims(); const auto& outDims = getChildEdgeAt(0)->getMemory().getStaticDims(); @@ -560,14 +572,16 @@ class ROIPooling::ROIPoolingJitExecutor : public ROIPooling::ROIPoolingExecutor OPENVINO_THROW("Can't create jit RoiPooling kernel"); } - if (roi_pooling_kernel) + if (roi_pooling_kernel) { roi_pooling_kernel->create_ker(); + } #endif } void exec(const IMemory& srcData, const IMemory& srcRoi, const IMemory& dst) override { - if (!roi_pooling_kernel) + if (!roi_pooling_kernel) { OPENVINO_THROW("Could not execute. Kernel for RoiPooling node was not compiled."); + } auto src_strides = srcData.getDescWithType()->getStrides(); auto src_roi_step = srcRoi.getDescWithType()->getStrides()[0]; @@ -670,11 +684,13 @@ class ROIPooling::ROIPoolingJitExecutor : public ROIPooling::ROIPoolingExecutor int left_x_index = static_cast(floorf(in_x)); int right_x_index = static_cast(ceilf(in_x)); - if (right_x_index > jpp.iw - 1) + if (right_x_index > jpp.iw - 1) { right_x_index = jpp.iw - 1; + } - if (bottom_y_index > jpp.ih - 1) + if (bottom_y_index > jpp.ih - 1) { bottom_y_index = jpp.ih - 1; + } arg.dst = &dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]]; @@ -833,11 +849,13 @@ class ROIPooling::ROIPoolingRefExecutor : public ROIPooling::ROIPoolingExecutor int left_x_index = static_cast(floorf(in_x)); int right_x_index = static_cast(ceilf(in_x)); - if (right_x_index > jpp.iw - 1) + if (right_x_index > jpp.iw - 1) { right_x_index = jpp.iw - 1; + } - if (bottom_y_index > jpp.ih - 1) + if (bottom_y_index > jpp.ih - 1) { bottom_y_index = jpp.ih - 1; + } for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) { int ch_blk_cur = cbb * cb_num + cbb_cur; @@ -966,8 +984,9 @@ template std::shared_ptr ROIPooling::ROIPoolingExecutor::makeExecutor( const jit_roi_pooling_params& jpp) { #if defined(OPENVINO_ARCH_X86_64) - if (mayiuse(cpu::x64::sse41)) + if (mayiuse(cpu::x64::sse41)) { return std::make_shared>(jpp); + } #endif return std::make_shared>(jpp); diff --git a/src/plugins/intel_cpu/src/nodes/roll.cpp b/src/plugins/intel_cpu/src/nodes/roll.cpp index 858f4750463852..10994a3be5a6dd 100644 --- a/src/plugins/intel_cpu/src/nodes/roll.cpp +++ b/src/plugins/intel_cpu/src/nodes/roll.cpp @@ -45,16 +45,18 @@ Roll::Roll(const std::shared_ptr& op, const GraphContext::CPtr& contex const auto& dataPrecision = getOriginalInputPrecisionAtPort(DATA_INDEX); if (std::find(supportedPrecisionSizes.begin(), supportedPrecisionSizes.end(), dataPrecision.size()) == - supportedPrecisionSizes.end()) + supportedPrecisionSizes.end()) { THROW_CPU_NODE_ERR("as unsupported precision: ", dataPrecision.get_type_name()); + } const auto dataRank = getInputShapeAtPort(DATA_INDEX).getRank(); if (dataRank < 1) { THROW_CPU_NODE_ERR("doesn't support 'data' input tensor with rank: ", dataRank); } - if (dataRank != getOutputShapeAtPort(0).getRank()) + if (dataRank != getOutputShapeAtPort(0).getRank()) { THROW_CPU_NODE_ERR("has input/output rank mismatch"); + } /* Axes */ const auto& axesTensorPrec = getOriginalInputPrecisionAtPort(AXES_INDEX); @@ -85,8 +87,9 @@ Roll::Roll(const std::shared_ptr& op, const GraphContext::CPtr& contex void Roll::getSupportedDescriptors() {} void Roll::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } ov::element::Type precision = getOriginalInputPrecisionAtPort(0); @@ -102,16 +105,21 @@ void Roll::prepareParams() { const auto& axesMemPtr = getSrcMemoryAtPort(AXES_INDEX); const auto& dstMemPtr = getDstMemoryAtPort(0); - if (!dataMemPtr || !dataMemPtr->isDefined()) + if (!dataMemPtr || !dataMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined input memory of 'data'"); - if (!shiftMemPtr || !shiftMemPtr->isDefined()) + } + if (!shiftMemPtr || !shiftMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined input memory of 'shift'"); - if (!axesMemPtr || !axesMemPtr->isDefined()) + } + if (!axesMemPtr || !axesMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined input memory of 'axes'"); - if (!dstMemPtr || !dstMemPtr->isDefined()) + } + if (!dstMemPtr || !dstMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined output memory"); - if (getSelectedPrimitiveDescriptor() == nullptr) + } + if (getSelectedPrimitiveDescriptor() == nullptr) { THROW_CPU_NODE_ERR("has unidentified preferable primitive descriptor"); + } const VectorDims& dataDims = dataMemPtr->getStaticDims(); const VectorDims& shiftDims = shiftMemPtr->getStaticDims(); @@ -126,8 +134,9 @@ void Roll::executeDynamicImpl(const dnnl::stream& strm) { } void Roll::execute(const dnnl::stream& strm) { - if (!execPtr) + if (!execPtr) { THROW_CPU_NODE_ERR("has no compiled executor"); + } const auto dataPrecision = getParentEdgeAt(DATA_INDEX)->getMemory().getDesc().getPrecision(); const auto& dataTypeSize = dataPrecision.size(); @@ -167,12 +176,14 @@ Roll::RollExecutor::RollExecutor(const VectorDims& dataDims, numOfIterations{std::accumulate(dataDims.cbegin(), dataDims.cend(), 1ul, std::multiplies()) / blockSize}, axesLength{axesDims[0]} { for (size_t i = 0; i < dataDims.size(); ++i) { - if (dataDims[i] != dstDims[i]) + if (dataDims[i] != dstDims[i]) { OPENVINO_THROW("Input/output tensors dimensions mismatch"); + } } - if (shiftDims[0] != axesDims[0]) + if (shiftDims[0] != axesDims[0]) { OPENVINO_THROW("'shift' and 'axes' dimensions mismatch"); + } } template @@ -219,11 +230,13 @@ void Roll::RollExecutor::exec(const MemoryPtr& dataMemPtr, calculateShiftOffset(rightBlockStartOffset, shiftsVector[dim], strides[dim], dataDims[dim]); } - if (leftBlockSize > 0) + if (leftBlockSize > 0) { cpu_memcpy(dst + leftBlockStartOffset, data + start, leftBlockSize * elementSize); + } - if (rightBlockSize > 0) + if (rightBlockSize > 0) { cpu_memcpy(dst + rightBlockStartOffset, data + (start + leftBlockSize), rightBlockSize * elementSize); + } }); } diff --git a/src/plugins/intel_cpu/src/nodes/rope.cpp b/src/plugins/intel_cpu/src/nodes/rope.cpp index 984b35237d93ba..46e2d7155bd749 100644 --- a/src/plugins/intel_cpu/src/nodes/rope.cpp +++ b/src/plugins/intel_cpu/src/nodes/rope.cpp @@ -45,24 +45,29 @@ static std::shared_ptr createJitKernel(const jit_rotary_c bool flag = true; if (check_vec_size2) { auto vec_size = jit_rotary_kernel::vec_size; - if (param.rotary_ndims % (vec_size * 2) != 0) + if (param.rotary_ndims % (vec_size * 2) != 0) { flag = false; + } } - if (flag) + if (flag) { res = std::make_shared>(param); + } } else if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2)) { bool flag = true; if (check_vec_size2) { auto vec_size = jit_rotary_kernel::vec_size; - if (param.rotary_ndims % (vec_size * 2) != 0) + if (param.rotary_ndims % (vec_size * 2) != 0) { flag = false; + } } - if (flag) + if (flag) { res = std::make_shared>(param); + } } - if (res) + if (res) { res->create_kernel(); + } #endif // OPENVINO_ARCH_X86_64 @@ -144,10 +149,11 @@ struct RoPE::RoPEExecutorRotateHalf : public RoPE::Executor { parallel_for3d(batch_size, head_cnt, seq_len, [&](size_t b, size_t h, size_t p) { auto cos_pos = p; if (gather) { - if (gather.m_rank == 4) + if (gather.m_rank == 4) { cos_pos = gather.at({b, h, p, 0}, true); - else + } else { cos_pos = gather.at({b, p}, true); + } } auto* src = t_src.ptr(b, h, p); auto* cos = &t_cos.at({b, h, cos_pos, 0}, true); @@ -371,8 +377,9 @@ struct RoPE::RoPEExecutorQwen : public RoPE::Executor { }; void RoPE::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } auto srcPrecision = getOriginalInputPrecisionAtPort(0); auto rtPrecision = srcPrecision; @@ -420,8 +427,9 @@ void RoPE::initSupportedPrimitiveDescriptors() { m_executor = std::make_shared>(m_config); rtPrecision = ov::element::f32; } - if (m_config.slice_stop - m_config.slice_start > 0 || m_config.input_trans0213) + if (m_config.slice_stop - m_config.slice_start > 0 || m_config.input_trans0213) { can_inplace = false; + } } // initialize input ports diff --git a/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp b/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp index e6455505e55532..0d3730008ee611 100644 --- a/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp +++ b/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp @@ -80,11 +80,13 @@ struct MHAKernel { float dot_product(const D* a, const D* b, int len, int stride_b = 1) { float result = 0; if (stride_b == 1) { - for (int i = 0; i < len; i++) + for (int i = 0; i < len; i++) { result += static_cast(a[i]) * static_cast(b[i]); + } } else { - for (int i = 0; i < len; i++) + for (int i = 0; i < len; i++) { result += static_cast(a[i]) * static_cast(b[i * stride_b]); + } } return result; } @@ -140,8 +142,9 @@ struct MHAKernel { auto kv_len = present_key.size(2); auto Hk = present_key.size(1); size_t h_each_group_len = H / Hk; - if (d_scale == 0.0f) + if (d_scale == 0.0f) { d_scale = 1.0f / sqrt(head_size); + } auto k_stride_s = present_key.stride(3); @@ -155,26 +158,30 @@ struct MHAKernel { // how many key/values can be accessed causally auto ncausal = kv_len; // no causall mask is set and it's not fused into attention_mask - if (auto_causal) + if (auto_causal) { ncausal = kv_len - q_len + m + 1; + } for (size_t n = 0; n < ncausal; n++) { auto* k = &present_key.at({b, h / h_each_group_len, n, 0}, true); attn_score[n] = dot_product(q, k, head_size, k_stride_s) * d_scale; // apply alibi tensor - if (alibi_mask) + if (alibi_mask) { attn_score[n] += alibi_mask.at({b, h, m, n}, true); + } // apply attention mask (maybe combined with causal_mask) - if (attention_mask) + if (attention_mask) { attn_score[n] += attention_mask.at({b, h, m, n}, true); + } // apply causal_mask if (causal_mask) { bool is_zero = causal_mask.at({b, h, m, n}, true) == 0; if (select_nfltmax_at_0) { - if (is_zero) + if (is_zero) { attn_score[n] = -FLT_MAX; + } } else { if (!is_zero) { attn_score[n] = -FLT_MAX; @@ -257,8 +264,9 @@ struct MHAKernel { dnnl::memory::dims make_dnnl_dims(const std::vector& dims) { dnnl::memory::dims dnnl_dims(dims.size()); - for (size_t i = 0; i < dims.size(); i++) + for (size_t i = 0; i < dims.size(); i++) { dnnl_dims[i] = static_cast(dims[i]); + } return dnnl_dims; } @@ -296,10 +304,11 @@ struct MHAKernel { } qk_gemm_ptr = qk_result.first; - if (has_out_transpose) + if (has_out_transpose) { out_md = dnnl::memory::desc(make_dnnl_dims({B, q_len, H, head_size_v}), qkv_dt, tag::abcd); - else + } else { out_md = dnnl::memory::desc(make_dnnl_dims({B, H, q_len, head_size_v}), qkv_dt, tag::abcd); + } size_t ldc_index = 2; if (has_out_transpose) { @@ -368,8 +377,9 @@ struct MHAKernel { T* k_ptr = &present_key.at({b, h, 0, 0}); T* v_ptr = &present_value.at({b, h, 0, 0}); qk_gemm_ptr->copy_buffer_b(k_ptr, &qk_scratch_b.at({b, h, 0})); - if (is_xf16) + if (is_xf16) { wv_gemm_ptr->copy_buffer_b(v_ptr, &wv_scratch_b.at({b, h, 0})); + } }); // attention @@ -391,23 +401,26 @@ struct MHAKernel { auto alibi_stride = 0; if (alibi_mask) { alibi_ptr = &alibi_mask.at({b, h, 0, 0}, true); - if (alibi_mask.size(2) > 1) + if (alibi_mask.size(2) > 1) { alibi_stride = alibi_mask.stride(2); + } } uint8_t* attn_mask_ptr = nullptr; auto attn_mask_stride = 0; if (attention_mask) { attn_mask_ptr = reinterpret_cast(&attention_mask.at({b, h, 0, 0}, true)); - if (attention_mask.size(2) > 1) + if (attention_mask.size(2) > 1) { attn_mask_stride = attention_mask.stride(2) * sizeof(T); + } } uint8_t* cmask_ptr = nullptr; auto cmask_stride = 0; if (causal_mask) { cmask_ptr = &causal_mask.at({b, h, 0, 0}, true); - if (causal_mask.size(2) > 1) + if (causal_mask.size(2) > 1) { cmask_stride = causal_mask.stride(2); + } } for (size_t m = m_start; m < m_end; m++) { // apply attention mask & sofmax @@ -497,8 +510,9 @@ struct MHAKernel { bool auto_causal, float d_scale = 0.0f) { auto head_size = query.size(3); - if (d_scale == 0.0f) + if (d_scale == 0.0f) { d_scale = 1.0f / sqrt(head_size); + } prepare_brgemm_prim(strm, query, present_key, present_value, has_out_transpose); execute_brgemm(query, @@ -709,16 +723,18 @@ struct MHAKernel { auto h_group_num = present_key.size(1); size_t h_each_group_len = H / h_group_num; - if (d_scale == 0.0f) + if (d_scale == 0.0f) { d_scale = 1.0f / sqrt(head_size); + } auto k_stride_s = present_key.stride(3); auto m_blocks = (q_len + m_block_size - 1) / m_block_size; auto bhb_loop = [&](size_t b, size_t h, size_t m_blk) { auto thread_id = parallel_get_thread_num(); - if (thread_id < 0) + if (thread_id < 0) { OPENVINO_THROW("The calling thread isn't initialized!"); + } auto& qk_buf = qk_buffers[thread_id]; auto m_start = m_blk * m_block_size; @@ -735,28 +751,31 @@ struct MHAKernel { auto alibi_stride = 0; if (alibi_mask) { alibi_ptr = &alibi_mask.at({b, h, 0, 0}, true); - if (alibi_mask.size(2) > 1) + if (alibi_mask.size(2) > 1) { alibi_stride = alibi_mask.stride(2); + } } uint8_t* attn_mask_ptr = nullptr; auto attn_mask_stride = 0; if (attention_mask) { attn_mask_ptr = reinterpret_cast(&attention_mask.at({b, h, 0, 0}, true)); - if (attention_mask.size(2) > 1) + if (attention_mask.size(2) > 1) { attn_mask_stride = attention_mask.stride(2) * sizeof(float); + } } uint8_t* cmask_ptr = nullptr; auto cmask_stride = 0; if (causal_mask) { cmask_ptr = &causal_mask.at({b, h, 0, 0}, true); - if (causal_mask.size(2) > 1) + if (causal_mask.size(2) > 1) { cmask_stride = causal_mask.stride(2); + } } float* qk = &(qk_buf.at({0, 0})); auto qk_m_stride = qk_buf.stride(0); - if (k_stride_s == 1) + if (k_stride_s == 1) { mlas_sgemm("N", "T", m_cnt, @@ -771,7 +790,7 @@ struct MHAKernel { qk, qk_m_stride, 1); - else + } else { mlas_sgemm("N", "N", m_cnt, @@ -786,6 +805,7 @@ struct MHAKernel { qk, qk_m_stride, 1); + } for (size_t m = m_start; m < m_end; m++) { // apply attention mask & sofmax @@ -893,8 +913,9 @@ struct ScaledDotProductAttention::AttentionExecutor : public ScaledDotProductAtt void prepare_attn_mask(const MemoryPtr& attn_input) { attn_buf.resize(attn_input->getStaticDims()); auto p = attn_input->getDataAs(); - for (size_t i = 0; i < attn_input->getSize(); i++) + for (size_t i = 0; i < attn_input->getSize(); i++) { attn_buf.ptr()[i] = p[i] ? 0.0f : -FLT_MAX; + } } void execute(const dnnl::stream& strm, @@ -944,8 +965,9 @@ struct ScaledDotProductAttention::AttentionExecutor : public ScaledDotProductAtt present_value = present_value.reshape(kv_shape); } - if (beam_input) + if (beam_input) { beam_table.reset(beam_input); + } if (input_num > 3) { // attn_mask if (inputs[3]->getDesc().getPrecision() == ov::element::u8) { @@ -986,8 +1008,9 @@ struct ScaledDotProductAttention::AttentionExecutor : public ScaledDotProductAtt } present_key.assert_dims({B, Hk, L0 + L1, S}); present_value.assert_dims({B, Hk, L0 + L1, SV}); - if (beam_table) + if (beam_table) { beam_table.assert_dims({B, L0 + L1}); + } bool auto_causal; bool use_attn_mask; @@ -1005,11 +1028,12 @@ struct ScaledDotProductAttention::AttentionExecutor : public ScaledDotProductAtt if (input_num > 3 && attn_mask.m_rank > 1) { assert(attn_mask); // spec requires at least 3, but torch sl test does use rank 2 - if (attn_mask.m_rank == 2) + if (attn_mask.m_rank == 2) { attn_mask = attn_mask.reshape({1, 1, attn_mask.m_dims[0], attn_mask.m_dims[1]}); - else if (attn_mask.m_rank == 3) + } else if (attn_mask.m_rank == 3) { attn_mask = attn_mask.reshape({1, attn_mask.m_dims[0], attn_mask.m_dims[1], attn_mask.m_dims[2]}); + } auto_causal = false; use_attn_mask = true; } else { @@ -1093,8 +1117,9 @@ ScaledDotProductAttention::ScaledDotProductAttention(const std::shared_ptr& op, const GraphCon } void ScatterUpdate::getSupportedDescriptors() { - if ((getParentEdges().size() != 3) && (getParentEdges().size() != 4)) + if ((getParentEdges().size() != 3) && (getParentEdges().size() != 4)) { THROW_CPU_NODE_ERR("has incorrect number of input edges"); - if (getChildEdges().empty()) + } + if (getChildEdges().empty()) { THROW_CPU_NODE_ERR("has incorrect number of output edges"); + } } void ScatterUpdate::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } const auto& srcDataDim = getInputShapeAtPort(DATA_ID).getDims(); const auto& indicesDim = getInputShapeAtPort(INDICES_ID).getDims(); @@ -302,8 +305,9 @@ void ScatterUpdate::initSupportedPrimitiveDescriptors() { std::vector inPortConfig{{LayoutType::ncsp, dataPrec, false, canBeInplace ? 0 : -1}, {LayoutType::ncsp, indicesPrec}, {LayoutType::ncsp, dataPrec}}; - if (axisRelaxed) + if (axisRelaxed) { inPortConfig.emplace_back(LayoutType::ncsp, axisPrec); + } addSupportedPrimDesc(inPortConfig, {{LayoutType::ncsp, dataPrec, false, canBeInplace ? 0 : -1}}, impl_desc_type::unknown); @@ -555,8 +559,9 @@ void ScatterUpdate::scatterElementsUpdate(const MemoryPtr& mem_data, const auto& indices_shape = mem_indices->getStaticDims(); const size_t updates_rank = indices_shape.size(); - if (axis < 0) + if (axis < 0) { axis += updates_rank; + } OPENVINO_ASSERT(axis >= 0 && axis < static_cast(updates_rank), "Invalid axis."); const int64_t data_dim_size = static_cast(data_shape[axis]); @@ -585,8 +590,9 @@ void ScatterUpdate::scatterElementsUpdate(const MemoryPtr& mem_data, auto indices_offset = offsets[1]; for (size_t idx = 0; idx < index_dim_size; idx++) { int64_t idxValue = getIndicesValue(indicesPtr, indices_offset); - if (idxValue < 0) + if (idxValue < 0) { idxValue += data_dim_size; + } assert(idxValue < data_dim_size && idxValue >= 0); dataPtr[offsets[0] + idxValue * dataBlock_axisplus1] = value; indices_offset += indicesBlock_axisplus1; @@ -606,8 +612,9 @@ void ScatterUpdate::scatterElementsUpdate(const MemoryPtr& mem_data, auto indices_offset = offsets[1]; for (size_t idx = 0; idx < index_dim_size; idx++) { int64_t idxValue = getIndicesValue(indicesPtr, indices_offset); - if (idxValue < 0) + if (idxValue < 0) { idxValue += data_dim_size; + } assert(idxValue < data_dim_size && idxValue >= 0); auto dst = &dataPtr[offsets[0] + idxValue * dataBlock_axisplus1]; auto src = &updatePtr[indices_offset]; @@ -628,8 +635,9 @@ void ScatterUpdate::scatterElementsUpdate(const MemoryPtr& mem_data, size_t* ptr_indices_offset = &indices_offsets[0]; for (size_t worker = start; worker < end; worker++) { // idx = 0 int64_t idxValue = getIndicesValue(indicesPtr, *ptr_indices_offset); - if (idxValue < 0) + if (idxValue < 0) { idxValue += data_dim_size; + } assert(idxValue < data_dim_size && idxValue >= 0); auto dst = &dataPtr[ptr_dst_offset[0] + idxValue * dataBlock_axisplus1]; auto src = &updatePtr[ptr_indices_offset[0]]; @@ -646,8 +654,9 @@ void ScatterUpdate::scatterElementsUpdate(const MemoryPtr& mem_data, for (size_t worker = start; worker < end; worker++) { auto indices_offset = *ptr_indices_offset + idx * indicesBlock_axisplus1; int64_t idxValue = getIndicesValue(indicesPtr, indices_offset); - if (idxValue < 0) + if (idxValue < 0) { idxValue += data_dim_size; + } assert(idxValue < data_dim_size && idxValue >= 0); auto dst = &dataPtr[ptr_dst_offset[0] + idxValue * dataBlock_axisplus1]; auto src = &updatePtr[indices_offset]; @@ -678,8 +687,9 @@ void ScatterUpdate::scatterElementsUpdate(const MemoryPtr& mem_data, const auto& indices_shape = mem_indices->getStaticDims(); size_t updates_rank = indices_shape.size(); - if (axis < 0) + if (axis < 0) { axis += updates_rank; + } OPENVINO_ASSERT(axis >= 0 && axis < static_cast(updates_rank), "Invalid axis."); const int64_t data_dim_size = static_cast(data_shape[axis]); @@ -708,8 +718,9 @@ void ScatterUpdate::scatterElementsUpdate(const MemoryPtr& mem_data, auto indices_offset = offsets[1]; for (size_t idx = 0; idx < index_dim_size; idx++) { int64_t idxValue = getIndicesValue(indicesPtr, indices_offset); - if (idxValue < 0) + if (idxValue < 0) { idxValue += data_dim_size; + } assert(idxValue < data_dim_size && idxValue >= 0); dataPtr[offsets[0] + idxValue * dataBlock_axisplus1] = value; indices_offset += indicesBlock_axisplus1; @@ -731,8 +742,9 @@ void ScatterUpdate::scatterElementsUpdate(const MemoryPtr& mem_data, auto indices_offset = offsets[1]; for (size_t idx = 0; idx < index_dim_size; idx++) { int64_t idxValue = getIndicesValue(indicesPtr, indices_offset); - if (idxValue < 0) + if (idxValue < 0) { idxValue += data_dim_size; + } assert(idxValue < data_dim_size && idxValue >= 0); auto dst = &dataPtr[offsets[0] + idxValue * dataBlock_axisplus1]; auto src = &updatePtr[indices_offset]; @@ -765,8 +777,9 @@ void ScatterUpdate::scatterElementsUpdate(const MemoryPtr& mem_data, size_t* ptr_indices_offset = &indices_offsets[0]; for (size_t worker = start; worker < end; worker++) { // idx = 0 int64_t idxValue = getIndicesValue(indicesPtr, *ptr_indices_offset); - if (idxValue < 0) + if (idxValue < 0) { idxValue += data_dim_size; + } assert(idxValue < data_dim_size && idxValue >= 0); auto dst = &dataPtr[ptr_dst_offset[0] + idxValue * dataBlock_axisplus1]; auto src = &updatePtr[ptr_indices_offset[0]]; @@ -785,8 +798,9 @@ void ScatterUpdate::scatterElementsUpdate(const MemoryPtr& mem_data, for (size_t worker = start; worker < end; worker++) { auto indices_offset = *ptr_indices_offset + idx * indicesBlock_axisplus1; int64_t idxValue = getIndicesValue(indicesPtr, indices_offset); - if (idxValue < 0) + if (idxValue < 0) { idxValue += data_dim_size; + } assert(idxValue < data_dim_size && idxValue >= 0); auto dst = &dataPtr[ptr_dst_offset[0] + idxValue * dataBlock_axisplus1]; auto src = &updatePtr[indices_offset]; @@ -911,11 +925,12 @@ void ScatterUpdate::execute(const dnnl::stream& strm) { } } } - if (updateRank > expectUpdateShape.size()) + if (updateRank > expectUpdateShape.size()) { THROW_CPU_NODE_ERR("cannot update shape. New rank: ", updateRank, ", expected: ", expectUpdateShape.size()); + } for (size_t ru = 0; ru < updateRank; ru++) { if (updateDim[ru] != expectUpdateShape[ru]) { THROW_CPU_NODE_ERR("do not have matched tensor shape relationship for input, indices and update"); diff --git a/src/plugins/intel_cpu/src/nodes/search_sorted.cpp b/src/plugins/intel_cpu/src/nodes/search_sorted.cpp index 2254dc46391073..6e7c8337cfafc5 100644 --- a/src/plugins/intel_cpu/src/nodes/search_sorted.cpp +++ b/src/plugins/intel_cpu/src/nodes/search_sorted.cpp @@ -37,8 +37,9 @@ void SearchSorted::getSupportedDescriptors() { } void SearchSorted::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } ov::element::Type inputPrec = getOriginalInputPrecisionAtPort(0); ov::element::Type outputPrec = getOriginalOutputPrecisionAtPort(0); diff --git a/src/plugins/intel_cpu/src/nodes/shapeof.cpp b/src/plugins/intel_cpu/src/nodes/shapeof.cpp index abd55142b098c2..074a0e6c667f32 100644 --- a/src/plugins/intel_cpu/src/nodes/shapeof.cpp +++ b/src/plugins/intel_cpu/src/nodes/shapeof.cpp @@ -29,23 +29,27 @@ ShapeOf::ShapeOf(const std::shared_ptr& op, const GraphContext::CPtr& : Node(op, context, ShapeOfShapeInferFactory()) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { - if (op->get_input_partial_shape(0).size() == 0) + if (op->get_input_partial_shape(0).size() == 0) { THROW_CPU_NODE_ERR("gets unsupported input 0D tensor (scalar)"); + } } else { OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } } void ShapeOf::getSupportedDescriptors() { - if (getParentEdges().size() != 1) + if (getParentEdges().size() != 1) { THROW_CPU_NODE_ERR("has incorrect number of input edges: ", getParentEdges().size()); - if (getChildEdges().empty()) + } + if (getChildEdges().empty()) { THROW_CPU_NODE_ERR("has incorrect number of output edges: ", getChildEdges().size()); + } } void ShapeOf::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } ov::element::Type precision = getOriginalInputPrecisionAtPort(0); @@ -87,8 +91,9 @@ void ShapeOf::execute(const dnnl::stream& strm) { auto outPtr = getDstMemoryAtPort(0); auto&& inDims = inPtr->getStaticDims(); size_t dimsCount = inDims.size(); - if (outPtr->getStaticDims().size() != 1 || dimsCount != outPtr->getStaticDims()[0]) + if (outPtr->getStaticDims().size() != 1 || dimsCount != outPtr->getStaticDims()[0]) { THROW_CPU_NODE_ERR("has inconsistent input shape and output size"); + } auto* dst = outPtr->getDataAs(); diff --git a/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp b/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp index f73b920345c46a..0b9e1999dfa303 100644 --- a/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp +++ b/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp @@ -71,25 +71,29 @@ ShuffleChannels::ShuffleChannels(const std::shared_ptr& op, const Grap OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } - if (inputShapes.size() != 1 || outputShapes.size() != 1) + if (inputShapes.size() != 1 || outputShapes.size() != 1) { THROW_SHCH_ERROR("has incorrect number of input/output edges."); + } auto shuffleChannels = ov::as_type_ptr(op); attrs.group = shuffleChannels->get_group(); attrs.axis = shuffleChannels->get_axis(); attrs.dataRank = getInputShapeAtPort(0).getRank(); - if (attrs.axis < 0) + if (attrs.axis < 0) { attrs.axis += attrs.dataRank; + } } void ShuffleChannels::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } ov::element::Type precision = getOriginalInputPrecisionAtPort(0); const std::set supported_precision_sizes = {1, 2, 4, 8, 16}; - if (supported_precision_sizes.find(precision.size()) == supported_precision_sizes.end()) + if (supported_precision_sizes.find(precision.size()) == supported_precision_sizes.end()) { THROW_SHCH_ERROR("has unsupported precision: ", precision.get_type_name()); + } impl_desc_type impl_type; if (mayiuse(cpu::x64::avx512_core)) { @@ -118,12 +122,15 @@ void ShuffleChannels::initSupportedPrimitiveDescriptors() { void ShuffleChannels::createPrimitive() { auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(0); - if (!dstMemPtr) + if (!dstMemPtr) { THROW_SHCH_ERROR("has null destination memory"); - if (!srcMemPtr) + } + if (!srcMemPtr) { THROW_SHCH_ERROR("has null input memory"); - if (getSelectedPrimitiveDescriptor() == nullptr) + } + if (getSelectedPrimitiveDescriptor() == nullptr) { THROW_SHCH_ERROR("has unidentified preferable primitive descriptor"); + } const auto& memoryDesc = srcMemPtr->getDesc(); attrs.spatialRank = attrs.dataRank - attrs.axis - 1; @@ -134,8 +141,9 @@ void ShuffleChannels::createPrimitive() { : LayoutType::ncsp; if (inputShapesDefined() && isExecutable()) { - if (needPrepareParams()) + if (needPrepareParams()) { prepareParams(); + } updateLastInputDims(); } } @@ -158,8 +166,9 @@ void ShuffleChannels::prepareParams() { } ShuffleChannels::ShuffleChannelsExecutor::ShuffleChannelsExecutor(const ShuffleChannelsAttributes& attrs) { - if (!one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c, LayoutType::nspc, LayoutType::ncsp)) + if (!one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c, LayoutType::nspc, LayoutType::ncsp)) { OPENVINO_THROW("ShuffleChannels executor supports only 'nCsp16c', 'nCsp8c', 'nspc' or 'ncsp' layouts."); + } const bool isBlocked = one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c); const bool isChannelsLast = attrs.layoutType == LayoutType::nspc; @@ -261,20 +270,23 @@ ShuffleChannels::ShuffleChannelsExecutor::ShuffleChannelsExecutor(const ShuffleC std::iota(params.src_block_order.begin(), params.src_block_order.end(), 0); std::iota(params.dst_block_order.begin(), params.dst_block_order.end(), 0); - for (int i = 0; i < reshapedRank; i++) + for (int i = 0; i < reshapedRank; i++) { params.dst_block_dims[i] = params.src_block_dims[params.order[i]]; + } permuteKernel = std::unique_ptr(new PermuteKernel(params)); } void ShuffleChannels::ShuffleChannelsExecutor::exec(const uint8_t* srcData, uint8_t* dstData, const int MB) { - if (!permuteKernel) + if (!permuteKernel) { OPENVINO_THROW("Could not execute. Kernel for Transpose node was not compiled."); + } - if (MB > 0) + if (MB > 0) { permuteKernel->execute(srcData, dstData, MB); - else + } else { permuteKernel->execute(srcData, dstData); + } } void ShuffleChannels::executeDynamicImpl(const dnnl::stream& strm) { @@ -282,8 +294,9 @@ void ShuffleChannels::executeDynamicImpl(const dnnl::stream& strm) { } void ShuffleChannels::execute(const dnnl::stream& strm) { - if (!execPtr) + if (!execPtr) { THROW_SHCH_ERROR("doesn't have a compiled executor."); + } int MB = (attrs.axis != 0) ? getSrcMemoryAtPort(0)->getStaticDims()[0] : -1; diff --git a/src/plugins/intel_cpu/src/nodes/softmax.cpp b/src/plugins/intel_cpu/src/nodes/softmax.cpp index 7dee3ae7aad8b6..f95e7bf3ae5f21 100644 --- a/src/plugins/intel_cpu/src/nodes/softmax.cpp +++ b/src/plugins/intel_cpu/src/nodes/softmax.cpp @@ -78,18 +78,22 @@ SoftMax::SoftMax(const std::shared_ptr& op, const GraphContext::CPtr& } void SoftMax::getSupportedDescriptors() { - if (descs.size()) + if (descs.size()) { return; + } ov::element::Type precision = getOriginalInputPrecisionAtPort(0); - if (!one_of(precision, ov::element::f32, ov::element::bf16, ov::element::f16)) + if (!one_of(precision, ov::element::f32, ov::element::bf16, ov::element::f16)) { precision = ov::element::f32; + } auto inputDataType = DnnlExtensionUtils::ElementTypeToDataType(precision); - if (getParentEdges().size() != 1) + if (getParentEdges().size() != 1) { OPENVINO_THROW("Incorrect number of input edges for layer ", getName()); - if (!getChildEdges().size()) + } + if (!getChildEdges().size()) { OPENVINO_THROW("Incorrect number of output edges for layer ", getName()); + } const auto& inShape = getInputShapeAtPort(0); if (inShape.getRank() == 3) { @@ -100,8 +104,9 @@ void SoftMax::getSupportedDescriptors() { for (auto format : getAvailableFormatsForDims(inShape)) { auto in_candidate = std::make_shared(inShape, inputDataType, format); - if (in_candidate->blocksExtended()) + if (in_candidate->blocksExtended()) { continue; + } createDescriptor({in_candidate}, {}); } @@ -120,8 +125,9 @@ Node::AttrPtr SoftMax::initPrimitiveAttr() { void SoftMax::initOptimalPrimitiveDescriptor() { auto selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) + if (selected_pd == nullptr) { OPENVINO_THROW("Preferable primitive descriptor is not set."); + } auto config = selected_pd->getConfig(); if (isDynamicNode()) { auto outMemDesc = config.outConfs[0].getMemDesc(); @@ -130,8 +136,9 @@ void SoftMax::initOptimalPrimitiveDescriptor() { } else { if (config.inConfs.size() != 1 || config.outConfs.size() != 1 || (config.inConfs[0].getMemDesc()->isDefined() && config.outConfs[0].getMemDesc()->isDefined() && - !config.outConfs[0].getPortDesc()->isCompatible(*config.inConfs[0].getPortDesc()))) + !config.outConfs[0].getPortDesc()->isCompatible(*config.inConfs[0].getPortDesc()))) { OPENVINO_THROW("Layer ", getName(), " has incorrect selected config!"); + } config.inConfs[0].setMemDesc(getConsistentInputDesc(config, 0)->getMemDesc()); config.outConfs[0].setMemDesc(config.inConfs[0].getMemDesc()); @@ -156,16 +163,18 @@ void SoftMax::createDescriptor(const std::vector& inputDesc, *attr, true); - if (desc) + if (desc) { descs.emplace_back(desc); + } } void SoftMax::prepareParams() { auto inpDesc = getParentEdgeAt(0)->getMemory().getDescWithType(); const NodeDesc* selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) + if (selected_pd == nullptr) { OPENVINO_THROW("Preferable primitive descriptor is not set for node ", getName(), "."); + } auto attr = initPrimitiveAttr(); diff --git a/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp b/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp index 58d5879cca9e1a..273a754d5a58fe 100644 --- a/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp +++ b/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp @@ -32,26 +32,31 @@ SpaceToBatch::SpaceToBatch(const std::shared_ptr& op, const GraphConte OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } - if (inputShapes.size() != 4 || outputShapes.size() != 1) + if (inputShapes.size() != 4 || outputShapes.size() != 1) { THROW_CPU_NODE_ERR("has incorrect number of input or output edges!"); + } const size_t srcRank = getInputShapeAtPort(0).getRank(); const size_t dstRank = getOutputShapeAtPort(0).getRank(); - if (srcRank < 4 || srcRank > 5) + if (srcRank < 4 || srcRank > 5) { THROW_CPU_NODE_ERR("has unsupported 'data' input rank: ", srcRank); - if (srcRank != dstRank) + } + if (srcRank != dstRank) { THROW_CPU_NODE_ERR("has incorrect number of input/output dimensions"); + } } void SpaceToBatch::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } const auto& inDims = getInputShapeAtPort(0).getDims(); const auto precision = getOriginalInputPrecisionAtPort(0); const std::set supported_precision_sizes = {1, 2, 4, 8}; - if (supported_precision_sizes.find(precision.size()) == supported_precision_sizes.end()) + if (supported_precision_sizes.find(precision.size()) == supported_precision_sizes.end()) { THROW_CPU_NODE_ERR("has unsupported precision: ", precision.get_type_name()); + } addSupportedPrimDesc({{LayoutType::nspc, precision}, {LayoutType::ncsp, ov::element::i32}, diff --git a/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp b/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp index 0384dabc63d73c..d1c02ef7f1821a 100644 --- a/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp +++ b/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp @@ -73,12 +73,14 @@ SpaceToDepth::SpaceToDepth(const std::shared_ptr& op, const GraphConte if (!isSupportedOperation(op, errorMessage)) { OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } - if (inputShapes.size() != 1 || outputShapes.size() != 1) + if (inputShapes.size() != 1 || outputShapes.size() != 1) { THROW_CPU_NODE_ERR("has incorrect number of input/output edges!"); + } auto spaceToDepth = ov::as_type_ptr(op); - if (!spaceToDepth) + if (!spaceToDepth) { THROW_CPU_NODE_ERR("supports only opset1"); + } const auto modeNgraph = spaceToDepth->get_mode(); if (modeNgraph == ov::op::v0::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST) { @@ -90,17 +92,21 @@ SpaceToDepth::SpaceToDepth(const std::shared_ptr& op, const GraphConte } attrs.blockSize = spaceToDepth->get_block_size(); - if (attrs.blockSize == 0) + if (attrs.blockSize == 0) { THROW_CPU_NODE_ERR("has incorrect block_size parameter is zero!"); + } const size_t srcRank = getInputShapeAtPort(0).getRank(); const size_t dstRank = getOutputShapeAtPort(0).getRank(); - if (srcRank < 3) + if (srcRank < 3) { THROW_CPU_NODE_ERR("has incorrect number of input dimensions"); - if (srcRank > 5) + } + if (srcRank > 5) { THROW_CPU_NODE_ERR("doesn't support dimensions with rank greater than 5"); - if (srcRank != dstRank) + } + if (srcRank != dstRank) { THROW_CPU_NODE_ERR("has incorrect number of input/output dimensions"); + } attrs.nSpatialDims = srcRank - 2; attrs.blockStep = static_cast(std::pow(attrs.blockSize, attrs.nSpatialDims)); } @@ -108,8 +114,9 @@ SpaceToDepth::SpaceToDepth(const std::shared_ptr& op, const GraphConte void SpaceToDepth::getSupportedDescriptors() {} void SpaceToDepth::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } ov::element::Type precision = getOriginalInputPrecisionAtPort(0); @@ -142,10 +149,12 @@ void SpaceToDepth::initSupportedPrimitiveDescriptors() { }; supportedTypes.push_back(LayoutType::nspc); - if (canUseBlocked(8lu)) + if (canUseBlocked(8lu)) { supportedTypes.push_back(LayoutType::nCsp8c); - if (canUseBlocked(16lu)) + } + if (canUseBlocked(16lu)) { supportedTypes.push_back(LayoutType::nCsp16c); + } } supportedTypes.push_back(LayoutType::ncsp); auto creators = BlockedDescCreator::getCommonCreators(); @@ -161,12 +170,15 @@ void SpaceToDepth::initSupportedPrimitiveDescriptors() { void SpaceToDepth::createPrimitive() { auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(0); - if (!dstMemPtr) + if (!dstMemPtr) { THROW_CPU_NODE_ERR("has null destination memory"); - if (!srcMemPtr) + } + if (!srcMemPtr) { THROW_CPU_NODE_ERR("has null input memory"); - if (getSelectedPrimitiveDescriptor() == nullptr) + } + if (getSelectedPrimitiveDescriptor() == nullptr) { THROW_CPU_NODE_ERR("has unidentified preferable primitive descriptor"); + } const auto& memoryDesc = srcMemPtr->getDesc(); attrs.dataSize = memoryDesc.getPrecision().size(); @@ -176,8 +188,9 @@ void SpaceToDepth::createPrimitive() { : LayoutType::ncsp; if (inputShapesDefined() && isExecutable()) { - if (needPrepareParams()) + if (needPrepareParams()) { prepareParams(); + } updateLastInputDims(); } } @@ -199,9 +212,10 @@ void SpaceToDepth::prepareParams() { } SpaceToDepth::SpaceToDepthExecutor::SpaceToDepthExecutor(const SpaceToDepthAttrs& attrs) { - if (!one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c, LayoutType::nspc, LayoutType::ncsp)) + if (!one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c, LayoutType::nspc, LayoutType::ncsp)) { OPENVINO_THROW("SpaceToDepth executor supports only 'nCsp16c', 'nCsp8c', " "'nspc' or 'ncsp' layouts."); + } const bool isBlocked = one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c); const bool isChannelsFirst = attrs.layoutType == LayoutType::nspc; @@ -285,15 +299,17 @@ SpaceToDepth::SpaceToDepthExecutor::SpaceToDepthExecutor(const SpaceToDepthAttrs std::iota(params.src_block_order.begin(), params.src_block_order.end(), 0); std::iota(params.dst_block_order.begin(), params.dst_block_order.end(), 0); - for (size_t i = 0; i < reshapedRank; i++) + for (size_t i = 0; i < reshapedRank; i++) { params.dst_block_dims[i] = params.src_block_dims[params.order[i]]; + } permuteKernel = std::unique_ptr(new PermuteKernel(params)); } void SpaceToDepth::SpaceToDepthExecutor::exec(const uint8_t* srcData, uint8_t* dstData, const int MB) { - if (!permuteKernel) + if (!permuteKernel) { OPENVINO_THROW("Could not execute. Kernel for Transpose node was not compiled."); + } permuteKernel->execute(srcData, dstData, MB); } diff --git a/src/plugins/intel_cpu/src/nodes/split.cpp b/src/plugins/intel_cpu/src/nodes/split.cpp index af8295cbe98a9e..2bfbd291d5d42b 100644 --- a/src/plugins/intel_cpu/src/nodes/split.cpp +++ b/src/plugins/intel_cpu/src/nodes/split.cpp @@ -82,8 +82,9 @@ void Split::getSupportedDescriptors() {} void Split::initSupportedPrimitiveDescriptors() { constexpr size_t channelsPos = 1lu; - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } const auto& srcShape = getInputShapeAtPort(0); const auto& dstFirstDims = getOutputShapeAtPort(0).getDims(); @@ -94,10 +95,12 @@ void Split::initSupportedPrimitiveDescriptors() { } for (size_t j = 0; j < dstFirstDims.size(); j++) { - if (j == axis) + if (j == axis) { continue; - if (!dimsEqualWeak(o_Dims[j], dstFirstDims[j])) + } + if (!dimsEqualWeak(o_Dims[j], dstFirstDims[j])) { THROW_CPU_NODE_ERR("has incorrect output dimensions"); + } } } @@ -111,8 +114,9 @@ void Split::initSupportedPrimitiveDescriptors() { if (srcShape.getRank() > 2) { for (auto item : {std::make_pair(8lu, LayoutType::nCsp8c), std::make_pair(16lu, LayoutType::nCsp16c)}) { const auto& blkDims = srcShape.getDims(); - if (blkDims[channelsPos] == Shape::UNDEFINED_DIM || blkDims[channelsPos] % item.first != 0) + if (blkDims[channelsPos] == Shape::UNDEFINED_DIM || blkDims[channelsPos] % item.first != 0) { continue; + } bool blocked = true; for (size_t i = 0; i < outputShapes.size(); i++) { @@ -298,8 +302,9 @@ void Split::execute(const dnnl::stream& strm) { return; } - if (dstMemPtrs.empty()) + if (dstMemPtrs.empty()) { THROW_CPU_NODE_ERR("Output data pointers have not been initialized."); + } const auto& srcMem = getParentEdgeAt(0)->getMemory(); @@ -320,8 +325,9 @@ bool Split::created() const { void Split::initOptimalPrimitiveDescriptor() { Node::initOptimalPrimitiveDescriptor(); auto selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) + if (selected_pd == nullptr) { THROW_CPU_NODE_ERR("Preferable primitive descriptor is not set."); + } auto config = selected_pd->getConfig(); canUseOptimizedNspc2Ncsp = false; @@ -330,8 +336,9 @@ void Split::initOptimalPrimitiveDescriptor() { if (axis == 1 && one_of(inConfDesc->getShape().getRank(), 4u, 5u) && inConfDesc->hasLayoutType(LayoutType::nspc)) { canUseOptimizedNspc2Ncsp = true; for (size_t i = 0; i < config.outConfs.size(); i++) { - if (!config.outConfs[i].getMemDesc()->hasLayoutType(LayoutType::ncsp)) + if (!config.outConfs[i].getMemDesc()->hasLayoutType(LayoutType::ncsp)) { canUseOptimizedNspc2Ncsp = false; + } } } } @@ -514,16 +521,18 @@ Split::SplitOptimizedExecutor::SplitOptimizedExecutor(const BlockedMemoryDescCPt const auto getRank = srcDims.size(); countStrides = 1; - for (unsigned int i = 0; i < axisOrderPos; i++) + for (unsigned int i = 0; i < axisOrderPos; i++) { countStrides *= srcDims[i]; + } srcDataStride = 0; dataSize.resize(outputPortsCount); for (size_t i = 0; i < outputPortsCount; i++) { dataSize[i] = srcDataSize; - for (size_t j = axisOrderPos; j < getRank; j++) + for (size_t j = axisOrderPos; j < getRank; j++) { dataSize[i] *= outDescs[i]->getBlockDims()[j]; + } srcDataStride += dataSize[i]; } @@ -551,8 +560,9 @@ void Split::resolveInPlaceEdges(Edge::LOOK look) { return; } auto selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) + if (selected_pd == nullptr) { OPENVINO_THROW("Preferable primitive descriptor is not set."); + } auto& config = selected_pd->getConfig(); size_t numberOfOutputs = config.outConfs.size(); size_t inplaceInpIndx = selected_pd->getConfig().outConfs[0].inPlace(); diff --git a/src/plugins/intel_cpu/src/nodes/stft.cpp b/src/plugins/intel_cpu/src/nodes/stft.cpp index 21a34585c45dda..751891e9c4af7f 100644 --- a/src/plugins/intel_cpu/src/nodes/stft.cpp +++ b/src/plugins/intel_cpu/src/nodes/stft.cpp @@ -53,8 +53,9 @@ void STFT::getSupportedDescriptors() { } void STFT::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } auto dataPrecision = getOriginalInputPrecisionAtPort(DATA_IDX); if (!one_of(dataPrecision, ov::element::f32)) { diff --git a/src/plugins/intel_cpu/src/nodes/strided_slice.cpp b/src/plugins/intel_cpu/src/nodes/strided_slice.cpp index f2b1a90e7b4c60..15c99b91824bb6 100644 --- a/src/plugins/intel_cpu/src/nodes/strided_slice.cpp +++ b/src/plugins/intel_cpu/src/nodes/strided_slice.cpp @@ -80,8 +80,9 @@ StridedSlice::StridedSlice(const std::shared_ptr& op, const GraphConte } } hasConstAttrInputs = !shapeHasDataDependency; - if (isAxesSpecified) + if (isAxesSpecified) { hasConstAttrInputs &= isConstantInput[attrs.AXES_ID]; + } const size_t inputRank = getInputShapeAtPort(attrs.DATA_ID).getRank(); const size_t outputRank = getOutputShapeAtPort(0).getRank(); @@ -98,8 +99,9 @@ StridedSlice::StridedSlice(const std::shared_ptr& op, const GraphConte mask[i] = 1 - mask[i]; } } - for (size_t i = mask.size(); i < nDims; ++i) + for (size_t i = mask.size(); i < nDims; ++i) { mask.push_back(bit); + } return mask; }; @@ -127,8 +129,9 @@ StridedSlice::StridedSlice(const std::shared_ptr& op, const GraphConte attrs.ellipsisMaskCounter += attrs.ellipsisMask[i]; attrs.ellipsisPos1 = attrs.ellipsisMask[i] == 1 && attrs.ellipsisPos1 == -1 ? i : attrs.ellipsisPos1; } - if (attrs.ellipsisMaskCounter > 1) + if (attrs.ellipsisMaskCounter > 1) { THROW_CPU_NODE_ERR("has incorrect 'Ellipsis_mask'. Only one non-zero bit is allowed"); + } int newAxis = std::accumulate(attrs.newAxisMask.begin(), attrs.newAxisMask.end(), 0); int shrinkAxis = std::accumulate(attrs.shrinkAxisMask.begin(), attrs.shrinkAxisMask.end(), 0); @@ -138,25 +141,29 @@ StridedSlice::StridedSlice(const std::shared_ptr& op, const GraphConte } auto fillingInParameters = [&](std::vector& parameter, const size_t type, const int value) { - if (!isConstantInput[type]) + if (!isConstantInput[type]) { return; + } const auto constNode = ov::as_type_ptr(op->get_input_node_shared_ptr(type)); parameter = constNode->cast_vector(); auto size = constNode->get_shape()[0]; if (type != attrs.AXES_ID && attrs.ellipsisMaskCounter == 0 && size < nDims) { - for (size_t i = size; i < nDims; i++) + for (size_t i = size; i < nDims; i++) { parameter.push_back(value); + } } }; fillingInParameters(attrs.begin, attrs.BEGIN_ID, 0); fillingInParameters(attrs.end, attrs.END_ID, 0); - if (inputShapes.size() > attrs.STRIDE_ID) + if (inputShapes.size() > attrs.STRIDE_ID) { fillingInParameters(attrs.stride, attrs.STRIDE_ID, 1); - if (inputShapes.size() > attrs.AXES_ID) + } + if (inputShapes.size() > attrs.AXES_ID) { fillingInParameters(attrs.axes, attrs.AXES_ID, 0); + } } void StridedSlice::getSupportedDescriptors() {} @@ -171,8 +178,9 @@ static void addHiddenDims(StridedSlice::StridedSliceAttributes& attrs, std::vector strideTmp(outputRank, 1); size_t i = 0lu; for (auto& a : attrs.axes) { - if (a < 0) + if (a < 0) { a += outputRank; + } beginTmp[a] = attrs.begin[i]; endTmp[a] = attrs.end[i]; strideTmp[a] = attrs.stride[i++]; @@ -194,12 +202,15 @@ static void addHiddenDims(StridedSlice::StridedSliceAttributes& attrs, auto addHiddenDims = [&](std::vector& data, const int bit = 0) { std::vector temp; temp.reserve(attrs.ellipsisPos1); - for (int i = 0; i < attrs.ellipsisPos1; i++) + for (int i = 0; i < attrs.ellipsisPos1; i++) { temp.push_back(data[i]); - for (size_t i = attrs.ellipsisPos1; i < ellipsisPos2 + 1; i++) + } + for (size_t i = attrs.ellipsisPos1; i < ellipsisPos2 + 1; i++) { temp.push_back(bit); - for (size_t i = 1; i < inputRank - ellipsisPos2; i++) + } + for (size_t i = 1; i < inputRank - ellipsisPos2; i++) { temp.push_back(data[i + attrs.ellipsisPos1]); + } data = temp; }; @@ -215,8 +226,9 @@ static void addHiddenDims(StridedSlice::StridedSliceAttributes& attrs, } void StridedSlice::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } const ov::element::Type dataPrecision = getOriginalInputPrecisionAtPort(attrs.DATA_ID); const ov::element::Type iPrecision = ov::element::i32; @@ -249,18 +261,22 @@ void StridedSlice::initSupportedPrimitiveDescriptors() { std::vector supportedTypes; if (nDims > 2 && attrs.equalDims) { auto canUseBlocked = [&](StridedSliceAttributes& tmpAttrs, const size_t blockSize) { - if (attrs.isSliceScatterOp) + if (attrs.isSliceScatterOp) { return false; - if (!isConstantInput[attrs.BEGIN_ID]) + } + if (!isConstantInput[attrs.BEGIN_ID]) { return false; + } const auto& srcDims = getInputShapeAtPort(attrs.DATA_ID).getDims(); - if (srcDims[1] == Shape::UNDEFINED_DIM) + if (srcDims[1] == Shape::UNDEFINED_DIM) { return false; + } auto channelBeginNormalized = tmpAttrs.begin[1] > 0 ? tmpAttrs.begin[1] : tmpAttrs.begin[1] + static_cast(srcDims[1]); return srcDims[1] % blockSize == 0 && abs(tmpAttrs.stride[1]) == 1 && - (channelBeginNormalized > static_cast(srcDims[1]) || channelBeginNormalized % blockSize == 0 || - channelBeginNormalized < 0 || tmpAttrs.beginMask[1] == 0); + (channelBeginNormalized > static_cast(srcDims[1]) || + channelBeginNormalized % blockSize == 0 || channelBeginNormalized < 0 || + tmpAttrs.beginMask[1] == 0); }; supportedTypes.push_back(LayoutType::nspc); @@ -271,10 +287,12 @@ void StridedSlice::initSupportedPrimitiveDescriptors() { getInputShapeAtPort(attrs.DATA_ID).getRank(), getOutputShapeAtPort(0).getRank(), isAxesSpecified); - if (canUseBlocked(tmpAttrs, 8lu)) + if (canUseBlocked(tmpAttrs, 8lu)) { supportedTypes.push_back(LayoutType::nCsp8c); - if (canUseBlocked(tmpAttrs, 16lu)) + } + if (canUseBlocked(tmpAttrs, 16lu)) { supportedTypes.push_back(LayoutType::nCsp16c); + } } } supportedTypes.push_back(LayoutType::ncsp); @@ -288,12 +306,14 @@ void StridedSlice::initSupportedPrimitiveDescriptors() { creators.at(LayoutType::ncsp)->createSharedDesc(iPrecision, getInputShapeAtPort(attrs.BEGIN_ID))); config.inConfs[attrs.END_ID].setMemDesc( creators.at(LayoutType::ncsp)->createSharedDesc(iPrecision, getInputShapeAtPort(attrs.END_ID))); - if (isStrideSpecified) + if (isStrideSpecified) { config.inConfs[attrs.STRIDE_ID].setMemDesc( creators.at(LayoutType::ncsp)->createSharedDesc(iPrecision, getInputShapeAtPort(attrs.STRIDE_ID))); - if (isAxesSpecified) + } + if (isAxesSpecified) { config.inConfs[attrs.AXES_ID].setMemDesc( creators.at(LayoutType::ncsp)->createSharedDesc(iPrecision, getInputShapeAtPort(attrs.AXES_ID))); + } if (attrs.isSliceScatterOp) { config.inConfs[attrs.UPDATES_ID].setMemDesc( itr->second->createSharedDesc(dataPrecision, getInputShapeAtPort(attrs.UPDATES_ID))); @@ -343,8 +363,9 @@ bool StridedSlice::needShapeInfer() const { } void StridedSlice::execute(const dnnl::stream& strm) { - if (!execPtr) + if (!execPtr) { THROW_CPU_NODE_ERR("doesn't have compiled executor!"); + } execPtr->exec(srcMemory, dstMemory); } @@ -395,8 +416,9 @@ void StridedSlice::StridedSliceCommonExecutor::orderParametersByLayouts( } else if (isPerChannelLayout) { auto sortByOrder = [&](std::vector& data) { std::vector temp(srcOrder.size()); - for (size_t i = 0; i < srcOrder.size(); i++) + for (size_t i = 0; i < srcOrder.size(); i++) { temp[i] = data[srcOrder[i]]; + } data = temp; }; @@ -433,53 +455,66 @@ void StridedSlice::StridedSliceCommonExecutor::paramsInitialization(const Stride parameter.assign(ptr, ptr + size); if (type != attrs.AXES_ID && params.attrs.ellipsisMaskCounter == 0 && size < nDims) { - for (size_t i = size; i < nDims; i++) + for (size_t i = size; i < nDims; i++) { parameter.push_back(value); + } } }; params.attrs.beginDims = srcMemory[attrs.BEGIN_ID]->getShape().getStaticDims(); params.attrs.endDims = srcMemory[attrs.END_ID]->getShape().getStaticDims(); - if (params.attrs.beginDims.size() != 1) + if (params.attrs.beginDims.size() != 1) { OPENVINO_THROW("Strided slice common executor should have begin vector with 1 dimension"); - if (params.attrs.endDims.size() != 1) + } + if (params.attrs.endDims.size() != 1) { OPENVINO_THROW("Strided slice common executor should have end vector with 1 dimension"); - if (params.attrs.beginDims[0] != params.attrs.endDims[0]) + } + if (params.attrs.beginDims[0] != params.attrs.endDims[0]) { OPENVINO_THROW("Strided slice common executor should have begin vector with size equal to end vector size"); + } - if (params.attrs.begin.empty()) + if (params.attrs.begin.empty()) { fillingInParameters(params.attrs.begin, attrs.BEGIN_ID, params.attrs.beginDims[0], 0); - if (params.attrs.end.empty()) + } + if (params.attrs.end.empty()) { fillingInParameters(params.attrs.end, attrs.END_ID, params.attrs.endDims[0], 0); + } if (srcMemory.size() > attrs.STRIDE_ID) { params.attrs.strideDims = srcMemory[attrs.STRIDE_ID]->getShape().getStaticDims(); - if (params.attrs.strideDims.size() > 1) + if (params.attrs.strideDims.size() > 1) { OPENVINO_THROW("Strided slice common executor should have stride vector with 1 dimension"); - if (params.attrs.beginDims[0] != params.attrs.strideDims[0]) + } + if (params.attrs.beginDims[0] != params.attrs.strideDims[0]) { OPENVINO_THROW( "Strided slice common executor should have stride vector with size equal to begin vector size"); + } - if (params.attrs.stride.empty()) + if (params.attrs.stride.empty()) { fillingInParameters(params.attrs.stride, attrs.STRIDE_ID, params.attrs.strideDims[0], 1); + } } if (srcMemory.size() > attrs.AXES_ID) { params.attrs.axesDims = srcMemory[attrs.AXES_ID]->getShape().getStaticDims(); - if (params.attrs.axesDims.size() != 1) + if (params.attrs.axesDims.size() != 1) { OPENVINO_THROW("Strided slice common executor should have axes vector with 1 dimension."); - if (params.attrs.beginDims[0] != params.attrs.axesDims[0]) + } + if (params.attrs.beginDims[0] != params.attrs.axesDims[0]) { OPENVINO_THROW( "Strided slice common executor should have axes vector with size equal to begin vector size."); + } - if (params.attrs.axes.empty()) + if (params.attrs.axes.empty()) { fillingInParameters(params.attrs.axes, attrs.AXES_ID, params.attrs.axesDims[0], 0); + } } addHiddenDims(params.attrs, inputRank, outputRank, srcMemory.size() > attrs.AXES_ID); - if (!srcBlockedMemoryDesc->hasLayoutType(LayoutType::ncsp)) + if (!srcBlockedMemoryDesc->hasLayoutType(LayoutType::ncsp)) { orderParametersByLayouts(srcBlockedMemoryDesc); + } } void StridedSlice::StridedSliceCommonExecutor::dimsNormalization() { @@ -520,12 +555,14 @@ void StridedSlice::StridedSliceCommonExecutor::dimsNormalization() { int nNewAxisAfterEllipses = 0; int nSrcAxisBeforeEllipses = 0; for (size_t i = 0; i < axis; ++i) { - if (params.attrs.newAxisMask[i] != 1) + if (params.attrs.newAxisMask[i] != 1) { nSrcAxisBeforeEllipses++; + } } for (size_t i = axis + 1; i < params.attrs.begin.size(); ++i) { - if (params.attrs.newAxisMask[i] == 1) + if (params.attrs.newAxisMask[i] == 1) { nNewAxisAfterEllipses++; + } } size_t nSrcAxisAfterEllipses = (params.attrs.begin.size() - axis - nNewAxisAfterEllipses - 1); @@ -614,10 +651,11 @@ void StridedSlice::StridedSliceCommonExecutor::dimsGluing() { indexes.push_back(0u == idx ? 0 : idx - 1); indexes.push_back(params.attrs.stride[idx] == 1 ? idx : idx + 1); - if (idx != 0 && secondDim.first == 0) + if (idx != 0 && secondDim.first == 0) { secondDim.first = idx; - else if (idx != 0 && secondDim.second == params.attrs.begin.size()) + } else if (idx != 0 && secondDim.second == params.attrs.begin.size()) { secondDim.second = idx; + } } } @@ -669,8 +707,9 @@ void StridedSlice::StridedSliceCommonExecutor::dimsGluing() { params.dstStrides.insert(params.dstStrides.begin() + 1, params.dstStrides[0] / realDstDim); params.srcStrides.insert(params.srcStrides.begin() + 1, params.srcStrides[0] / realSrcDim); - for (size_t idx = secondDim.first + 1; idx < secondDim.second; idx++) + for (size_t idx = secondDim.first + 1; idx < secondDim.second; idx++) { params.attrs.begin[1] /= dstBlockedDimsBefore[idx]; + } if (params.dstBlockedDims[0] < m_threads_num) { params.dstBlockedDims[1] /= realDstDim; @@ -679,15 +718,17 @@ void StridedSlice::StridedSliceCommonExecutor::dimsGluing() { params.srcBlockedDims.insert(params.srcBlockedDims.begin() + 1, realSrcDim); } - if (params.dstBlockedDims.size() > 2) + if (params.dstBlockedDims.size() > 2) { lastDstDim /= dstBlockedDimsBefore[secondDim.first]; + } } // some parameter calculations for common execution params.isOptimized = params.nDimsForWork == 1 && params.dstBlockedDims.size() > 1; if (params.isOptimized) { - if (params.dstBlockedDims.size() == 2) + if (params.dstBlockedDims.size() == 2) { params.dstBlockedDims[1] = 1; + } workAmount = params.dstBlockedDims[0] * params.dstBlockedDims[1]; srcShift = (params.attrs.begin[0] * params.srcStrides[0] + params.attrs.begin[1] * params.srcStrides[1]) * @@ -723,8 +764,9 @@ void StridedSlice::StridedSliceCommonExecutor::indicesCalculation() { auto getSrcIdx = [&](const VectorDims& indexes) { size_t srcIdx = 0; - for (size_t i = 0; i < params.nDimsForWork; ++i) + for (size_t i = 0; i < params.nDimsForWork; ++i) { srcIdx += (params.attrs.begin[i] + indexes[i] * params.attrs.stride[i]) * params.srcStrides[i]; + } return srcIdx * params.attrs.dataSize; }; @@ -751,8 +793,9 @@ void StridedSlice::StridedSliceCommonExecutor::indicesCalculation() { out = true; } - if (out) + if (out) { srcIdx = getSrcIdx(coords); + } } }); } @@ -785,8 +828,9 @@ void StridedSlice::StridedSliceCommonExecutor::execStridedSlice(const std::vecto size_t start = 0, end = 0; splitter(workAmount, nthr, ithr, start, end); - for (size_t iwork = start; iwork < end; ++iwork) + for (size_t iwork = start; iwork < end; ++iwork) { cpu_memcpy(&dstData[dstIndices[iwork]], &srcShiftedData[srcIndices[iwork]], lastDstDim); + } }); } @@ -805,8 +849,9 @@ void StridedSlice::StridedSliceCommonExecutor::execSliceScatter(const std::vecto size_t start = 0, end = 0; splitter(workAmount, nthr, ithr, start, end); - for (size_t iwork = start; iwork < end; ++iwork) + for (size_t iwork = start; iwork < end; ++iwork) { cpu_memcpy(&dstShiftedData[srcIndices[iwork]], &srcUpdates[dstIndices[iwork]], lastDstDim); + } }); } diff --git a/src/plugins/intel_cpu/src/nodes/string_tensor_pack.cpp b/src/plugins/intel_cpu/src/nodes/string_tensor_pack.cpp index 2f7db689e7a17f..32c1d403f94ef5 100644 --- a/src/plugins/intel_cpu/src/nodes/string_tensor_pack.cpp +++ b/src/plugins/intel_cpu/src/nodes/string_tensor_pack.cpp @@ -36,8 +36,9 @@ void StringTensorPack::getSupportedDescriptors() { } void StringTensorPack::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } ov::element::Type indicesPrecision = getOriginalInputPrecisionAtPort(0); addSupportedPrimDesc({{LayoutType::ncsp, indicesPrecision}, {LayoutType::ncsp, indicesPrecision}, diff --git a/src/plugins/intel_cpu/src/nodes/string_tensor_unpack.cpp b/src/plugins/intel_cpu/src/nodes/string_tensor_unpack.cpp index 9bfb7544470686..f7cdb19dd006ec 100644 --- a/src/plugins/intel_cpu/src/nodes/string_tensor_unpack.cpp +++ b/src/plugins/intel_cpu/src/nodes/string_tensor_unpack.cpp @@ -37,8 +37,9 @@ void StringTensorUnpack::getSupportedDescriptors() { } void StringTensorUnpack::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } addSupportedPrimDesc({{LayoutType::ncsp, ov::element::string}}, {{LayoutType::ncsp, ov::element::i32}, {LayoutType::ncsp, ov::element::i32}, diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp index 43a005b27cb450..1060aabf20551c 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp +++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp @@ -78,8 +78,9 @@ struct SubgraphKey { using namespace dnnl::impl::primitive_hashing; size_t seed = get_attr_hash(0, attrs); - for (const auto& shape : in_shapes) + for (const auto& shape : in_shapes) { seed = get_vector_hash(seed, shape); + } return seed; } @@ -122,8 +123,9 @@ struct SubgraphShapeInferResultKey { using namespace dnnl::impl::primitive_hashing; size_t seed = hash_combine(0, body_hash); - for (const auto& shape : in_shapes) + for (const auto& shape : in_shapes) { seed = get_vector_hash(seed, shape); + } return seed; } @@ -183,8 +185,9 @@ uint64_t Subgraph::getBodyHash(const std::shared_ptr& sn } void Subgraph::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } const std::set supportedPrecisions = {ov::element::f32, ov::element::i32, ov::element::bf16, ov::element::f16, ov::element::i8, ov::element::u8}; @@ -192,8 +195,9 @@ void Subgraph::initSupportedPrimitiveDescriptors() { bool dimRanksAreEqual = true; for (size_t i = 0; dimRanksAreEqual && i < inputShapes.size(); i++) { for (size_t j = 0; dimRanksAreEqual && j < outputShapes.size(); j++) { - if (inputShapes[i].getRank() != outputShapes[j].getRank()) + if (inputShapes[i].getRank() != outputShapes[j].getRank()) { dimRanksAreEqual = false; + } } } @@ -212,9 +216,10 @@ void Subgraph::initSupportedPrimitiveDescriptors() { dnnl::impl::utils::one_of(ndims, 3u, 4u, 5u) && dimRanksAreEqual && !isOnlyPlanarApplicable && !isDynamic; for (const auto& inShape : inputShapes) { - if (isDynamic && inShape.getRank() != 1) + if (isDynamic && inShape.getRank() != 1) { isBlockedApplicable = isBlockedApplicable && inShape.getMinDims()[1] != Shape::UNDEFINED_DIM && inShape.getMinDims()[1] > 1; + } } #endif @@ -275,8 +280,9 @@ void Subgraph::initSupportedPrimitiveDescriptors() { subgraph_attrs->snippet->has_domain_sensitive_ops()) ? context->getConfig().inferencePrecision : originalInputPrecision; - if (supportedPrecisions.count(precision) == 0) + if (supportedPrecisions.count(precision) == 0) { OPENVINO_THROW("Subgraph node with name `", getName(), "` doesn't support ", precision, " precision."); + } const auto equalPrecisions = getOriginalOutputPrecisions().size() == 1 && precision == getOriginalOutputPrecisionAtPort(0); @@ -294,8 +300,9 @@ void Subgraph::initSupportedPrimitiveDescriptors() { config.outConfs.resize(outputShapes.size()); for (size_t i = 0; i < outputShapes.size(); i++) { auto precision = getOriginalOutputPrecisionAtPort(i); - if (supportedPrecisions.count(precision) == 0) + if (supportedPrecisions.count(precision) == 0) { OPENVINO_THROW("Subgraph node with name `", getName(), "` doesn't support ", precision, " precision."); + } BlockedMemoryDesc::CmpMask outputMask = BlockedMemoryDesc::SKIP_OFFSET_MASK; PortConfig portConfig; @@ -323,10 +330,12 @@ void Subgraph::initSupportedPrimitiveDescriptors() { return {config, impl_type}; }; - if (isChannelsFirstApplicable) + if (isChannelsFirstApplicable) { supportedPrimitiveDescriptors.emplace_back(initDesc(ChannelsFirst)); - if (isBlockedApplicable) + } + if (isBlockedApplicable) { supportedPrimitiveDescriptors.emplace_back(initDesc(Blocked)); + } supportedPrimitiveDescriptors.emplace_back(initDesc(Planar)); } @@ -367,10 +376,12 @@ void Subgraph::createPrimitive() { void Subgraph::initMemoryPtrs() { srcMemPtrs.resize(input_num); dstMemPtrs.resize(output_num); - for (size_t i = 0; i < input_num; i++) + for (size_t i = 0; i < input_num; i++) { srcMemPtrs[i] = getSrcMemoryAtPort(i); - for (size_t i = 0; i < output_num; i++) + } + for (size_t i = 0; i < output_num; i++) { dstMemPtrs[i] = getDstMemoryAtPort(i); + } } void Subgraph::initAttributes() { @@ -400,10 +411,12 @@ void Subgraph::initStartOffsets() { }; start_offset_in.resize(input_num); start_offset_out.resize(output_num); - for (size_t i = 0; i < input_num; i++) + for (size_t i = 0; i < input_num; i++) { start_offset_in[i] = get_offset(srcMemPtrs[i]->getDescWithType()); - for (size_t i = 0; i < output_num; i++) + } + for (size_t i = 0; i < output_num; i++) { start_offset_out[i] = get_offset(dstMemPtrs[i]->getDescWithType()); + } } snippets::op::Subgraph::BlockedShapeVector Subgraph::getSnippetsBlockedShapes() const { @@ -424,17 +437,20 @@ std::pair, std::vector> Subgra std::pair, std::vector> precisions; precisions.first.reserve(input_num); precisions.second.reserve(output_num); - for (const auto& p : subgraph_attrs->inMemPrecs) + for (const auto& p : subgraph_attrs->inMemPrecs) { precisions.first.push_back(p); - for (const auto& p : subgraph_attrs->outMemPrecs) + } + for (const auto& p : subgraph_attrs->outMemPrecs) { precisions.second.push_back(p); + } return precisions; } void Subgraph::initPluginBlockedShapes() const { in_shapes.resize(input_num); - for (size_t i = 0; i < srcMemPtrs.size(); i++) + for (size_t i = 0; i < srcMemPtrs.size(); i++) { in_shapes[i] = srcMemPtrs[i]->getDescWithType()->getBlockDims(); + } } Subgraph::DataFlowPasses Subgraph::getDataFlowPasses() { @@ -556,8 +572,9 @@ uint32_t Subgraph::getBroadcastingMask(const std::vector& input_shap for (const auto& broadcastable_input : broadcastable_inputs) { const auto& shape = input_shapes[broadcastable_input.first]; mask = mask << 1; - if (*(shape.rbegin() + broadcastable_input.second) == 1) + if (*(shape.rbegin() + broadcastable_input.second) == 1) { mask = mask | 1; + } } return mask; } @@ -582,8 +599,9 @@ void Subgraph::optimizeIR() { // TODO: Snippets don't support backend-provided blocking, so we need to reshape body // using blocked shapes first. This can be removed after [121670] std::vector in_shapes; - for (const auto& s : in_blocked_shapes) + for (const auto& s : in_blocked_shapes) { in_shapes.emplace_back(s.first); + } subgraph->shape_infer(in_shapes); const auto control_flow_config = std::make_shared(); @@ -673,8 +691,9 @@ void Subgraph::prepareParams() { } IShapeInfer::Result Subgraph::shapeInfer() const { - for (size_t i = 0; i < srcMemPtrs.size(); i++) + for (size_t i = 0; i < srcMemPtrs.size(); i++) { in_shapes[i] = srcMemPtrs[i]->getDescWithType()->getBlockDims(); + } auto builder = [this](const SubgraphShapeInferResultKey& key) -> std::shared_ptr { return std::make_shared(Node::shapeInfer()); @@ -695,15 +714,17 @@ bool Subgraph::canBeInPlace() const { for (auto& parentEdge : getParentEdges()) { auto parent = parentEdge.lock()->getParent(); - if (parent->getChildEdges().size() != 1) + if (parent->getChildEdges().size() != 1) { return false; + } // WA to prevent memory corruption caused by inplace feature if (parent->getType() == Type::Concatenation) { for (auto& parentParentEdge : parent->getParentEdges()) { auto parentParent = parentParentEdge.lock()->getParent(); - if (parentParent->getChildEdges().size() != 1) + if (parentParent->getChildEdges().size() != 1) { return false; + } } } } diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp index cffde3a81d23dd..22bac0edf9542d 100644 --- a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp +++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp @@ -63,8 +63,9 @@ static void redefineToMemories(const std::vector& to_mems, const Memo // this method get all memory ptrs of childs of one port to redefine descs for them static std::vector getToMemories(const Node* node, const size_t port) { std::vector memories; - for (auto& edge : node->getChildEdgesAtPort(port)) + for (auto& edge : node->getChildEdgesAtPort(port)) { memories.push_back(edge->getMemoryPtr()); + } return memories; } @@ -234,11 +235,12 @@ DynamicBuffer::DynamicBuffer(MemoryPtr from_, std::vector to_, const elem_size(DnnlExtensionUtils::sizeOfDataType(from->getDataType())) {} void DynamicBuffer::execute(const dnnl::engine& eng, const int iter) { - if (from->getStaticDims()[map_rule.axis] != static_cast(std::abs(map_rule.stride))) + if (from->getStaticDims()[map_rule.axis] != static_cast(std::abs(map_rule.stride))) { OPENVINO_THROW("TensorIterator (Loop) has incorrect output shape[axis] after iteration for concatenation. ", std::abs(map_rule.stride), " is expected, but actual: ", from->getStaticDims()[map_rule.axis]); + } if (iter == 0) { init(eng); @@ -265,7 +267,8 @@ void DynamicBuffer::init(const dnnl::engine& eng) { const auto& src_mem = from->getPrimitive(); const auto& src_desc = src_mem.get_desc(); const auto& dims = src_desc.get_dims(); - count = std::accumulate(dims.begin(), dims.begin() + map_rule.axis, size_t(1), std::multiplies()); + count = + std::accumulate(dims.begin(), dims.begin() + map_rule.axis, static_cast(1), std::multiplies()); len = std::accumulate(dims.begin() + map_rule.axis + 1, dims.end(), elem_size, std::multiplies()); chunk_unit_in_byte = abs_stride * len; @@ -282,11 +285,13 @@ void DynamicBuffer::init(const dnnl::engine& eng) { bool DynamicBuffer::check_buffer() { if (map_rule.stride > 0) { - if (static_cast(chunk_offset_in_byte + chunk_unit_in_byte) > chunk_stride_in_byte) + if (static_cast(chunk_offset_in_byte + chunk_unit_in_byte) > chunk_stride_in_byte) { return true; + } } else { - if (chunk_offset_in_byte < 0) + if (chunk_offset_in_byte < 0) { return true; + } } return false; } @@ -295,8 +300,9 @@ MemoryPtr DynamicBuffer::create_buffer(const dnnl::engine& eng) { const auto abs_stride = std::abs(map_rule.stride); const auto estimate_iters = [&]() { - if (max_iter_count != -1) + if (max_iter_count != -1) { return max_iter_count; + } // in case of no idea of memory upper boundary return (num_execs == 0) ? 1 : 2 * num_execs; // growth factor 2 @@ -540,22 +546,25 @@ void TensorIterator::getSupportedDescriptors() { } void TensorIterator::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } supportedPrimitiveDescriptors.emplace_back(make_plain_config(ngraphOp), impl_desc_type::unknown); } void TensorIterator::createPrimitive() { - if (loopBodyConditionOutputIdx == -1) + if (loopBodyConditionOutputIdx == -1) { continue_cond_check.reset(new staticValueCheck(true)); // always true + } if (loopExecutionConditionIdx == -1) { initial_cond_check.reset(new staticValueCheck(true)); lastUsedCond = initial_cond_check->getStatus(); } - if (runAsDynamic()) + if (runAsDynamic()) { prepareDynamicBuffers(); + } if (inputShapesDefined() && (getAlgorithm() == Algorithm::TensorIteratorLoop || needPrepareParams())) { constexpr bool compileStage = true; @@ -568,8 +577,10 @@ bool TensorIterator::needPrepareParams() const { if (getAlgorithm() == Algorithm::TensorIteratorLoop) { const auto tripCountPtr = getSrcDataAtPortAs(loopTripCountIdx); const auto condPtr = getSrcDataAtPortAs(loopExecutionConditionIdx); - if (tripCountPtr[0] != static_cast(lastUsedTripCount) || static_cast(condPtr[0]) != lastUsedCond) + if (tripCountPtr[0] != static_cast(lastUsedTripCount) || + static_cast(condPtr[0]) != lastUsedCond) { return true; + } } // If sliced input shapes of node and body input shapes aren't equal, we should reshape body @@ -614,8 +625,9 @@ void TensorIterator::prepareParamsImpl(const bool compileStage) { } // reset local states of DynamicBuffer - for (auto& buffer : buffers) + for (auto& buffer : buffers) { buffer->reset(lastUsedTripCount); + } } } @@ -632,14 +644,16 @@ void TensorIterator::execute(const dnnl::stream& strm) { bool continue_cond = initial_cond_check->getStatus(); int max_num_iter = trip_count_check->getStatus(); - for (auto& mapper : first_mappers) + for (auto& mapper : first_mappers) { mapper.second->execute(strm); + } // use "i != max_num_iter" only to allow "-1" works like infinite loop for (int i = 0; i != max_num_iter && continue_cond; i++) { // copy data to subgraph iteration - for (auto& mapper : before_mappers) + for (auto& mapper : before_mappers) { mapper->execute(strm, i); + } sub_graph.Infer(); @@ -647,12 +661,14 @@ void TensorIterator::execute(const dnnl::stream& strm) { // copy data from subgraph iteration to outputs // or to the next iteration inputs - for (auto& mapper : after_mappers) + for (auto& mapper : after_mappers) { mapper->execute(strm, i); + } } - for (auto& mapper : last_mappers) + for (auto& mapper : last_mappers) { mapper->execute(strm); + } } void TensorIterator::executeDynamicImpl(const dnnl::stream& strm) { @@ -662,27 +678,32 @@ void TensorIterator::executeDynamicImpl(const dnnl::stream& strm) { bool continue_cond = initial_cond_check->getStatus(); int max_num_iter = trip_count_check->getStatus(); - for (auto& mapper : first_mappers) + for (auto& mapper : first_mappers) { mapper.second->execute(strm); + } // use "i != max_num_iter" only to allow "-1" works like infinite loop for (int i = 0; i != max_num_iter && continue_cond; i++) { // copy data to subgraph iteration - for (auto& mapper : before_mappers) + for (auto& mapper : before_mappers) { mapper->execute(strm, i); - for (auto& mapper : back_mappers) + } + for (auto& mapper : back_mappers) { mapper->execute(strm, i); + } sub_graph.Infer(); continue_cond = continue_cond_check->getStatus(); - for (auto& buffer : buffers) + for (auto& buffer : buffers) { buffer->execute(eng, i); + } // on the last iteration we shouldn't reshape body inputs and init back edges - if ((i + 1 != max_num_iter) && continue_cond) + if ((i + 1 != max_num_iter) && continue_cond) { prepareDynamicBackEdges(); + } } reshapeAndFillOutput(strm); @@ -697,12 +718,13 @@ void TensorIterator::prepareInputPorts() { auto& to_mem = input_mems[map_rule.to].front(); // first memory is enough to access the shared underlying physical memory - if (map_rule.axis == -1) + if (map_rule.axis == -1) { first_mappers.emplace(std::make_pair(map_rule.from, map_rule.to), std::make_shared(context->getParamsCache(), from_mem, to_mem)); - else + } else { before_mappers.emplace_back( std::make_shared(context->getParamsCache(), from_mem, to_mem, true, map_rule, eng)); + } } } @@ -712,16 +734,17 @@ void TensorIterator::prepareOutputPorts() { auto to_mem = getDstMemoryAtPort(map_rule.from); auto& from_mem = output_mem[map_rule.to]; - if (map_rule.axis == -1) + if (map_rule.axis == -1) { last_mappers.emplace_back( std::make_shared(context->getParamsCache(), from_mem, to_mem)); - else + } else { after_mappers.emplace_back(std::make_shared(context->getParamsCache(), from_mem, to_mem, false, map_rule, eng)); + } } } @@ -778,8 +801,9 @@ void TensorIterator::prepareInitialCond(const bool compileStage) { auto edge = getParentEdgeAt(loopExecutionConditionIdx); auto mem = edge->getMemoryPtr(); initial_cond_check.reset(new asBoolCheck(mem)); - if (IMPLICATION(compileStage, edge->getParent()->isConstant())) + if (IMPLICATION(compileStage, edge->getParent()->isConstant())) { lastUsedCond = initial_cond_check->getStatus(); + } } } @@ -803,8 +827,9 @@ void TensorIterator::prepareTripCount(const bool compileStage) { inline VectorDims sliced_input_dims(const MemoryPtr& mem, const int axis, const int stride) { auto dims = mem->getStaticDims(); - if (axis != -1) + if (axis != -1) { dims[axis] = abs(stride); + } return dims; } @@ -968,8 +993,9 @@ int TensorIterator::getNumIteration(const std::vector& inputPortMap, continue; } - if (dims[rule.axis] == Shape::UNDEFINED_DIM) + if (dims[rule.axis] == Shape::UNDEFINED_DIM) { continue; + } if (rule.from < 0 || rule.from >= static_cast(outputShapes.size())) { THROW_CPU_NODE_ERR(": Invalid \"from\" value: \"from\" = ", diff --git a/src/plugins/intel_cpu/src/nodes/tile.cpp b/src/plugins/intel_cpu/src/nodes/tile.cpp index 57bff9631cebde..f17bd951e61431 100644 --- a/src/plugins/intel_cpu/src/nodes/tile.cpp +++ b/src/plugins/intel_cpu/src/nodes/tile.cpp @@ -54,28 +54,32 @@ void Tile::getSupportedDescriptors() { const auto& vec_to_string = [](const std::vector& vec) -> std::string { std::string result = "["; for (size_t i = 0; i < vec.size(); i++) { - if (i) + if (i) { result += ", "; + } result += std::to_string(vec[i]); } return result; }; - if (getParentEdges().size() != 2) + if (getParentEdges().size() != 2) { THROW_CPU_NODE_ERR("has incorrect number of input edges. " "Expected: 2, Actual: ", getParentEdges().size()); - if (getChildEdges().empty()) + } + if (getChildEdges().empty()) { THROW_CPU_NODE_ERR("has no output edges."); + } const auto& dstDims0 = getOutputShapeAtPort(0).getDims(); for (size_t i = 1lu; i < outputShapes.size(); i++) { const auto& dstDims = getOutputShapeAtPort(i).getDims(); - if (dstDims.size() != dstDims0.size()) + if (dstDims.size() != dstDims0.size()) { THROW_CPU_NODE_ERR("has output edges 0 and ", i, " with different ranks: ", dstDims0.size(), " and ", dstDims.size()); + } for (size_t j = 0; j < dstDims0.size(); j++) { if (dstDims0[j] != dstDims[j]) { THROW_CPU_NODE_ERR("has output edges 0 and ", @@ -87,21 +91,24 @@ void Tile::getSupportedDescriptors() { } } } - if (constMap[TILE_REPEATS] && getInputShapeAtPort(TILE_INPUT).getRank() > getOutputShapeAtPort(0).getRank()) + if (constMap[TILE_REPEATS] && getInputShapeAtPort(TILE_INPUT).getRank() > getOutputShapeAtPort(0).getRank()) { THROW_CPU_NODE_ERR( " has incorrect input/output data shape rank. Input shape rank cannot be more than output shape rank. " "Actual input shape size: ", getInputShapeAtPort(TILE_INPUT).getRank(), ", output shape size: ", getOutputShapeAtPort(0).getRank()); + } - if (!isDynamicNode()) + if (!isDynamicNode()) { needPrepareParamsVar = true; + } } void Tile::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } supportedPrimitiveDescriptors = getSupportedConfigs(this, outputShapes.size()); } @@ -136,12 +143,14 @@ bool Tile::needShapeInfer() const { return true; } if (!constMap[TILE_REPEATS]) { - if (originRepeats.empty()) + if (originRepeats.empty()) { return true; + } const int32_t* repeatsData = getSrcDataAtPortAs(TILE_REPEATS); for (size_t i = 0lu; i < originRepeats.size(); i++) { - if (originRepeats[i] != static_cast(repeatsData[i])) + if (originRepeats[i] != static_cast(repeatsData[i])) { return true; + } } } needPrepareParamsVar = false; @@ -173,10 +182,12 @@ void Tile::plainExecute(const dnnl::stream& strm) { int m_inner_dim = 1; int m_outer_dim = 1; auto inDims = srcMemory.getStaticDims(); - for (int i = 0; i < axis; i++) + for (int i = 0; i < axis; i++) { m_outer_dim *= inDims[i]; - for (size_t i = axis; i < inDims.size(); i++) + } + for (size_t i = axis; i < inDims.size(); i++) { m_inner_dim *= inDims[i]; + } int MB = srcMemory.getStaticDims()[0]; if (axis > 0) { diff --git a/src/plugins/intel_cpu/src/nodes/topk.cpp b/src/plugins/intel_cpu/src/nodes/topk.cpp index ba1507c9b4b2e6..97c5b45388bf19 100644 --- a/src/plugins/intel_cpu/src/nodes/topk.cpp +++ b/src/plugins/intel_cpu/src/nodes/topk.cpp @@ -93,13 +93,15 @@ struct jit_uni_topk_kernel_f32 : public jit_uni_topk_kernel, public jit_generato bool shape_agnostic_alg = jcp_.algorithm == TopKAlgorithm::topk_heap_sort || (jcp_.algorithm == TopKAlgorithm::topk_bubble_sort && !jcp_.bubble_inplace); - if (!shape_agnostic_alg) + if (!shape_agnostic_alg) { mov(reg_table, l_table); + } data_type = DnnlExtensionUtils::ElementTypeToDataType(jcp_.precision); precision_in_reg = isFloatCompatible(data_type) ? ov::element::f32 : ov::element::i32; - if (!shape_agnostic_alg && jcp_.layout == TopKLayoutType::topk_blocked && jcp_.topk_innermost) + if (!shape_agnostic_alg && jcp_.layout == TopKLayoutType::topk_blocked && jcp_.topk_innermost) { blk_stride = jcp_.sort_stride * jcp_.blk_size; + } if (jcp_.mode_max) { cmp_flg = _cmp_lt_os; // if val[left] < val[right], set mask 1, swap @@ -109,8 +111,9 @@ struct jit_uni_topk_kernel_f32 : public jit_uni_topk_kernel, public jit_generato heap_cmp_flg = _cmp_lt_os; // max heap is used for min topk, if a < b, set mask 1, swap } - if (isa == cpu::x64::avx512_core) + if (isa == cpu::x64::avx512_core) { uni_vpxor(vmm_zero, vmm_zero, vmm_zero); + } load_pool_gpr_idxs = {static_cast(reg_load_store_mask.getIdx()), static_cast(reg_load_table.getIdx())}; @@ -123,8 +126,9 @@ struct jit_uni_topk_kernel_f32 : public jit_uni_topk_kernel, public jit_generato emit_emitters_data(); - if (!shape_agnostic_alg) + if (!shape_agnostic_alg) { prepare_idx_table(); + } } private: @@ -728,10 +732,11 @@ struct jit_uni_topk_kernel_f32 : public jit_uni_topk_kernel, public jit_generato mov(reg_prc, reg_dst); add(reg_prc, reg_offset); mov(reg_prc_idx, reg_dst_idx); - if (jcp_.data_size != sizeof(int)) + if (jcp_.data_size != sizeof(int)) { add(reg_prc_idx, reg_offset_idx); - else + } else { add(reg_prc_idx, reg_offset); + } load_scalar(xmm_val_l, ptr[reg_prc], data_type); load_scalar(xmm_idx_l, ptr[reg_prc_idx], memory::data_type::s32); add(reg_prc, jcp_.data_size); @@ -821,8 +826,9 @@ struct jit_uni_topk_kernel_f32 : public jit_uni_topk_kernel, public jit_generato if (is_valid_isa(cpu::x64::avx)) { vpcmpgtd(x1, x2, op); } else { - if (x1.getIdx() != x2.getIdx()) + if (x1.getIdx() != x2.getIdx()) { uni_vmovups(x1, x2); + } pcmpgtd(x1, op); } } @@ -1922,22 +1928,28 @@ TopK::TopK(const std::shared_ptr& op, const GraphContext::CPtr& contex vec_idx_seq.clear(); vec_idx_block.clear(); - if (inputShapes.size() != 2 || outputShapes.size() < 2) + if (inputShapes.size() != 2 || outputShapes.size() < 2) { THROW_CPU_NODE_ERR("gets incorrect number of input/output edges!"); + } - if (getInputShapeAtPort(TOPK_DATA).getRank() != getOutputShapeAtPort(TOPK_DATA).getRank()) + if (getInputShapeAtPort(TOPK_DATA).getRank() != getOutputShapeAtPort(TOPK_DATA).getRank()) { THROW_CPU_NODE_ERR("gets incorrect number of input/output dimensions!"); + } - if (getInputShapeAtPort(TOPK_K).getRank() != 1) + if (getInputShapeAtPort(TOPK_K).getRank() != 1) { THROW_CPU_NODE_ERR("gets incorrect index vector dimension! Index vector should be 1 dimension."); + } - if (out_dims != out_idx_dims) + if (out_dims != out_idx_dims) { THROW_CPU_NODE_ERR("gets incorrect output tensor dimension sizes!"); + } - if (axis < 0) + if (axis < 0) { axis += in_dims_size; - if (axis < 0 || axis >= static_cast(in_dims_size)) + } + if (axis < 0 || axis >= static_cast(in_dims_size)) { THROW_CPU_NODE_ERR("gets incorrect input parameters dimensions and axis number!"); + } } else { OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } @@ -1946,8 +1958,9 @@ TopK::TopK(const std::shared_ptr& op, const GraphContext::CPtr& contex void TopK::getSupportedDescriptors() {} void TopK::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } impl_desc_type impl_type; if (mayiuse(cpu::x64::avx512_core)) { @@ -1973,8 +1986,9 @@ void TopK::initSupportedPrimitiveDescriptors() { ov::element::u8}; ov::element::Type dataPrecision = getOriginalOutputPrecisionAtPort(TOPK_DATA); - if (dataPrecision == ov::element::bf16 && !mayiuse(avx512_core)) + if (dataPrecision == ov::element::bf16 && !mayiuse(avx512_core)) { THROW_CPU_NODE_ERR("gets incorrect isa for BF16! AVX512 must be supported!"); + } bool precisionSupported = std::find(std::begin(supportedPrecision), std::end(supportedPrecision), dataPrecision) != std::end(supportedPrecision); if (!precisionSupported) { @@ -2043,20 +2057,24 @@ void TopK::preset_params() { void TopK::prepareParams() { auto dstMemPtr = getDstMemoryAtPort(TOPK_DATA); auto srcMemPtr = getSrcMemoryAtPort(TOPK_DATA); - if (!dstMemPtr || !dstMemPtr->isDefined()) + if (!dstMemPtr || !dstMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined destination memory."); - if (!srcMemPtr || !srcMemPtr->isDefined()) + } + if (!srcMemPtr || !srcMemPtr->isDefined()) { THROW_CPU_NODE_ERR("has undefined input memory."); - if (getSelectedPrimitiveDescriptor() == nullptr) + } + if (getSelectedPrimitiveDescriptor() == nullptr) { THROW_CPU_NODE_ERR("has nullable preferable primitive descriptor"); + } src_dims = srcMemPtr->getDesc().getShape().getDims(); dst_dims = dstMemPtr->getDesc().getShape().getDims(); if (isDynamicNode()) { const int src_k = getSrcDataAtPortAs(TOPK_K)[0]; - if (static_cast(src_k) > src_dims[axis]) + if (static_cast(src_k) > src_dims[axis]) { THROW_CPU_NODE_ERR("gets top_k out of range!"); + } if (top_k != src_k) { top_k = src_k; } @@ -2118,8 +2136,9 @@ void TopK::prepareParams() { } else { // reference mode int j; for (j = src_dims.size() - 1; j >= 0; j--) { - if (src_dims[j] != 1) + if (src_dims[j] != 1) { break; + } } dim = static_cast(src_dims[axis]); before_num = count(src_dims, 0, axis); @@ -2137,8 +2156,9 @@ void TopK::createPrimitive() { } if (!isDynamicNode() && isExecutable()) { - if (needPrepareParams()) + if (needPrepareParams()) { prepareParams(); + } updateLastInputDims(); } @@ -2189,8 +2209,9 @@ void TopK::createPrimitive() { topk_kernel.reset(new jit_uni_topk_kernel_f32(jcp)); } - if (topk_kernel) + if (topk_kernel) { topk_kernel->create_ker(); + } #endif } } @@ -2427,24 +2448,27 @@ void TopK::calc_dims_size(const VectorDims& layout_dims) { layout_axis = axis == 0 ? 0 : (axis == 1 ? static_cast(layout_dims.size() - 1) : axis - 1); } - for (int i = 0; i < layout_axis; i++) + for (int i = 0; i < layout_axis; i++) { O *= layout_dims[i]; - for (size_t i = layout_axis + 1; i < layout_dims.size(); i++) + } + for (size_t i = layout_axis + 1; i < layout_dims.size(); i++) { I *= layout_dims[i]; + } if (layout == TopKLayoutType::topk_blocked && topk_innermost) { I /= blk_size; } } void TopK::topk_ref(const float* in_ptr, float* out_ptr, int32_t* dst_idx) { - if (mode_max) + if (mode_max) { topk_ref_process(in_ptr, out_ptr, dst_idx, src_dims, [](float x, float y) -> float { return x > y; }); - else + } else { topk_ref_process(in_ptr, out_ptr, dst_idx, src_dims, [](float x, float y) -> float { return x < y; }); + } } void TopK::topk_ref_process(const float* src_data, @@ -2487,10 +2511,11 @@ void TopK::topk_ref_process(const float* src_data, max_values[top_k] = src_data[s_index]; max_indexes[top_k] = i2; for (int i3 = top_k; i3 > 0; i3--) { - if (compare(max_values[i3], max_values[i3 - 1])) + if (compare(max_values[i3], max_values[i3 - 1])) { swap_func(i3, i3 - 1); - else + } else { break; + } } s_index += after_num; } @@ -2504,20 +2529,23 @@ void TopK::topk_ref_process(const float* src_data, } } if (dst_data) { - for (int i2 = 0; i2 < top_k; i2++) + for (int i2 = 0; i2 < top_k; i2++) { dst_data[i0 * top_k * after_num + i2 * after_num + i1] = max_values[i2]; + } } if (dst_idx) { - for (int i2 = 0; i2 < top_k; i2++) + for (int i2 = 0; i2 < top_k; i2++) { dst_idx[i0 * top_k * after_num + i2 * after_num + i1] = max_indexes[i2]; + } } }); } inline int TopK::count(const VectorDims& dims, size_t start_ind, size_t end_ind) { size_t count = 1; - for (size_t i = start_ind; i < end_ind; i++) + for (size_t i = start_ind; i < end_ind; i++) { count *= dims[i]; + } return static_cast(count); } diff --git a/src/plugins/intel_cpu/src/nodes/transpose.cpp b/src/plugins/intel_cpu/src/nodes/transpose.cpp index 0b253d4b83892a..e7d4ae61a9b177 100644 --- a/src/plugins/intel_cpu/src/nodes/transpose.cpp +++ b/src/plugins/intel_cpu/src/nodes/transpose.cpp @@ -59,8 +59,9 @@ Transpose::Transpose(const std::shared_ptr& op, const GraphContext::CP void Transpose::getSupportedDescriptors() {} void Transpose::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) + if (!supportedPrimitiveDescriptors.empty()) { return; + } prec = getOriginalInputPrecisionAtPort(0); @@ -135,8 +136,9 @@ bool Transpose::needPrepareParams() const { } void Transpose::prepareParams() { - if (isOptimized) + if (isOptimized) { return; + } if (performAsReorder) { // Transpose(order={0,3,1,2}) can be performed as Reorder(acdb=>abcd) @@ -196,17 +198,21 @@ void Transpose::prepareParams() { } void Transpose::createPrimitive() { - if (isOptimized) + if (isOptimized) { return; + } auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(INPUT_DATA_IDX); - if (!dstMemPtr) + if (!dstMemPtr) { OPENVINO_THROW("Destination memory is null."); - if (!srcMemPtr) + } + if (!srcMemPtr) { OPENVINO_THROW("Input memory is null."); - if (getSelectedPrimitiveDescriptor() == nullptr) + } + if (getSelectedPrimitiveDescriptor() == nullptr) { OPENVINO_THROW("Preferable primitive descriptor was not set."); + } if (getParentEdgeAt(INPUT_DATA_IDX)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp) && getChildEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp) && @@ -224,8 +230,9 @@ void Transpose::createPrimitive() { if (!performAsReorder) { transposeParams.permuteParams.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].getMemDesc()->getPrecision().size(); - if (isInputOrderConst) + if (isInputOrderConst) { transposeParams.permuteParams.order = order; + } auto srcDesc = getParentEdgeAt(INPUT_DATA_IDX)->getMemory().getDescWithType(); transposeParams.permuteParams.src_block_order = srcDesc->getOrder(); auto dstDesc = getChildEdgeAt(0)->getMemory().getDescWithType(); @@ -239,8 +246,9 @@ void Transpose::createPrimitive() { } void Transpose::execute(const dnnl::stream& strm) { - if (isOptimized) + if (isOptimized) { return; + } if (prim) { prim.execute(strm, primArgs); diff --git a/src/plugins/intel_cpu/src/nodes/unique.cpp b/src/plugins/intel_cpu/src/nodes/unique.cpp index 5a5888090ef6ee..c3dedbbb97859a 100644 --- a/src/plugins/intel_cpu/src/nodes/unique.cpp +++ b/src/plugins/intel_cpu/src/nodes/unique.cpp @@ -38,8 +38,9 @@ Unique::Unique(const std::shared_ptr& op, const GraphContext::CPtr& co OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); } - if (!one_of(op->get_input_size(), 1u, 2u) || op->get_output_size() != 4) + if (!one_of(op->get_input_size(), 1u, 2u) || op->get_output_size() != 4) { THROW_CPU_NODE_ERR("has incorrect number of input/output edges."); + } for (int i = 0; i < 4; i++) { definedOutputs[i] = !op->get_output_target_inputs(i).empty(); diff --git a/src/plugins/intel_cpu/src/onednn/dnnl.cpp b/src/plugins/intel_cpu/src/onednn/dnnl.cpp index 30f2a80310711a..51d8d39b285a30 100644 --- a/src/plugins/intel_cpu/src/onednn/dnnl.cpp +++ b/src/plugins/intel_cpu/src/onednn/dnnl.cpp @@ -138,7 +138,7 @@ unsigned get_cache_size(int level, bool per_core) { return dnnl::impl::cpu::platform::get_per_core_cache_size(level); } - if (level > 0 && (unsigned)level <= cpu().getDataCacheLevels()) { + if (level > 0 && static_cast(level) <= cpu().getDataCacheLevels()) { unsigned l = level - 1; return cpu().getDataCacheSize(l); } else { diff --git a/src/plugins/intel_cpu/src/onednn/iml_type_mapper.cpp b/src/plugins/intel_cpu/src/onednn/iml_type_mapper.cpp index d5af8ebef90c53..eb6fabe65a03b2 100644 --- a/src/plugins/intel_cpu/src/onednn/iml_type_mapper.cpp +++ b/src/plugins/intel_cpu/src/onednn/iml_type_mapper.cpp @@ -74,8 +74,9 @@ impl_desc_type parse_impl_name(std::string impl_desc_name) { #undef SEARCH_WORD_2 #undef SEARCH_WORD // Deconv case would set both jit and any in onednn, only set the jit bit. - if ((res & jit) && (res & any)) + if ((res & jit) && (res & any)) { res = static_cast(res & ~any); + } return res; } diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 6194438c928068..82342479aa9d77 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -55,9 +55,11 @@ static std::string getDeviceFullName() { __cpuid(regs[0], regs[0], regs[1], regs[2], regs[3]); # endif char* ch = reinterpret_cast(®s[0]); - for (size_t j = 0; j < sizeof(regs); j++) - if (ch[j] != '\0') + for (size_t j = 0; j < sizeof(regs); j++) { + if (ch[j] != '\0') { brand_string += ch[j]; + } + } } #else # error "Unkown CPU architecture. Please, add support to openvino/core/visibility.hpp" @@ -196,12 +198,14 @@ void Plugin::calculate_streams(Config& conf, const std::shared_ptr& m static Config::ModelType getModelType(const std::shared_ptr& model) { if (op::util::has_op_with_type(model) || - op::util::has_op_with_type(model)) + op::util::has_op_with_type(model)) { return Config::ModelType::CNN; + } if ((op::util::has_op_with_type(model) && model->get_variables().size() > 0) || - op::util::has_op_with_type(model)) + op::util::has_op_with_type(model)) { return Config::ModelType::LLM; + } return Config::ModelType::Unknown; } @@ -314,7 +318,7 @@ void Plugin::set_property(const ov::AnyMap& config) { ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) const { if (name == ov::optimal_number_of_infer_requests) { const auto streams = engConfig.streamExecutorConfig.get_streams(); - return decltype(ov::optimal_number_of_infer_requests)::value_type( + return static_cast( streams); // ov::optimal_number_of_infer_requests has no negative values } else if (name == ov::num_streams) { const auto streams = engConfig.streamExecutorConfig.get_streams(); @@ -324,20 +328,20 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) return decltype(ov::device::id)::value_type{engConfig.device_id}; } else if (name == ov::inference_num_threads) { const auto threads = engConfig.streamExecutorConfig.get_threads(); - return decltype(ov::inference_num_threads)::value_type(threads); + return static_cast(threads); } else if (name == ov::enable_profiling.name()) { const bool perfCount = engConfig.collectPerfCounters; - return decltype(ov::enable_profiling)::value_type(perfCount); + return static_cast(perfCount); } else if (name == ov::hint::inference_precision) { return decltype(ov::hint::inference_precision)::value_type(engConfig.inferencePrecision); } else if (name == ov::hint::performance_mode) { return engConfig.hintPerfMode; } else if (name == ov::hint::enable_cpu_pinning) { const bool pin_value = engConfig.enableCpuPinning; - return decltype(ov::hint::enable_cpu_pinning)::value_type(pin_value); + return static_cast(pin_value); } else if (name == ov::hint::enable_cpu_reservation) { const bool reserve_value = engConfig.enableCpuReservation; - return decltype(ov::hint::enable_cpu_reservation)::value_type(reserve_value); + return static_cast(reserve_value); } else if (name == ov::hint::scheduling_core_type) { const auto core_type = engConfig.schedulingCoreType; return core_type; @@ -346,9 +350,9 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) return distribution_policy; } else if (name == ov::hint::enable_hyper_threading) { const bool ht_value = engConfig.enableHyperThreading; - return decltype(ov::hint::enable_hyper_threading)::value_type(ht_value); + return static_cast(ht_value); } else if (name == ov::hint::num_requests) { - return decltype(ov::hint::num_requests)::value_type(engConfig.hintNumRequests); + return static_cast(engConfig.hintNumRequests); } else if (name == ov::hint::execution_mode) { return engConfig.executionMode; } else if (name == ov::internal::compiled_model_runtime_properties.name()) { @@ -376,7 +380,7 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) } else if (name == ov::internal::exclusive_async_requests.name()) { return engConfig.exclusiveAsyncRequests; } else if (name == ov::hint::dynamic_quantization_group_size) { - return decltype(ov::hint::dynamic_quantization_group_size)::value_type( + return static_cast( engConfig.fcDynamicQuantizationGroupSize); } else if (name == ov::hint::kv_cache_precision) { return decltype(ov::hint::kv_cache_precision)::value_type(engConfig.kvCachePrecision); @@ -385,9 +389,9 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) } else if (name == ov::value_cache_precision) { return decltype(ov::value_cache_precision)::value_type(engConfig.valueCachePrecision); } else if (name == ov::key_cache_group_size) { - return decltype(ov::key_cache_group_size)::value_type(engConfig.keyCacheGroupSize); + return static_cast(engConfig.keyCacheGroupSize); } else if (name == ov::value_cache_group_size) { - return decltype(ov::value_cache_group_size)::value_type(engConfig.valueCacheGroupSize); + return static_cast(engConfig.valueCacheGroupSize); } return get_ro_property(name, options); } @@ -464,13 +468,16 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio } else if (name == ov::device::capabilities) { std::vector capabilities; if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16) || - dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2)) + dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2)) { capabilities.push_back(ov::device::capability::BF16); - if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) + } + if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) { capabilities.push_back(ov::device::capability::WINOGRAD); + } capabilities.push_back(ov::device::capability::FP32); - if (hasHardwareSupport(ov::element::f16)) + if (hasHardwareSupport(ov::element::f16)) { capabilities.push_back(ov::device::capability::FP16); + } capabilities.push_back(ov::device::capability::INT8); capabilities.push_back(ov::device::capability::BIN); capabilities.push_back(ov::device::capability::EXPORT_IMPORT); @@ -485,15 +492,15 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio std::vector cachingProperties = {ov::device::full_name}; return decltype(ov::internal::caching_properties)::value_type(std::move(cachingProperties)); } else if (name == ov::intel_cpu::denormals_optimization) { - return decltype(ov::intel_cpu::denormals_optimization)::value_type(engConfig.denormalsOptMode == - Config::DenormalsOptMode::DO_On); + return static_cast( + engConfig.denormalsOptMode == Config::DenormalsOptMode::DO_On); } else if (name == ov::intel_cpu::sparse_weights_decompression_rate) { - return decltype(ov::intel_cpu::sparse_weights_decompression_rate)::value_type( + return static_cast( engConfig.fcSparseWeiDecompressionRate); } else if (name == ov::execution_devices) { return decltype(ov::execution_devices)::value_type{get_device_name()}; } else if (name == ov::device::type) { - return decltype(ov::device::type)::value_type(ov::device::Type::INTEGRATED); + return static_cast(ov::device::Type::INTEGRATED); } else if (name == ov::device::architecture) { #if defined(OPENVINO_ARCH_X86_64) return decltype(ov::device::architecture)::value_type{"intel64"}; diff --git a/src/plugins/intel_cpu/src/shape_inference/custom/eltwise.cpp b/src/plugins/intel_cpu/src/shape_inference/custom/eltwise.cpp index 9efd06c8283636..3126d841d99ccc 100644 --- a/src/plugins/intel_cpu/src/shape_inference/custom/eltwise.cpp +++ b/src/plugins/intel_cpu/src/shape_inference/custom/eltwise.cpp @@ -29,8 +29,9 @@ Result EltwiseShapeInfer::infer(const std::vector output_shape.size()) { @@ -42,8 +43,9 @@ Result EltwiseShapeInfer::infer(const std::vectorget_axis(); auto dstShape = oneHot->get_output_partial_shape(0); int output_dims_size = dstShape.size(); - if (0 == output_dims_size) + if (0 == output_dims_size) { output_dims_size = 1; + } if (axis < 0) { axis += output_dims_size; } diff --git a/src/plugins/intel_cpu/src/shape_inference/custom/scaled_attn.cpp b/src/plugins/intel_cpu/src/shape_inference/custom/scaled_attn.cpp index 5b893b3458e4fb..836bfa3359a32b 100644 --- a/src/plugins/intel_cpu/src/shape_inference/custom/scaled_attn.cpp +++ b/src/plugins/intel_cpu/src/shape_inference/custom/scaled_attn.cpp @@ -30,8 +30,9 @@ class SDPAShapeInfer : public ShapeInferEmptyPads { present_v_dims[0] = beam_idx_dims[0]; present_v_dims[2] += query_dims[2]; // normal and fast path - if (present_v_dims[3] == query_dims[3]) + if (present_v_dims[3] == query_dims[3]) { return {{query_dims, present_v_dims, present_v_dims}, ShapeInferStatus::success}; + } // diff kv feature size auto output_dims = query_dims; @@ -54,8 +55,9 @@ class SDPAShapeInfer : public ShapeInferEmptyPads { } // normal and fast path - if (present_v_dims[3] == query_dims[3]) + if (present_v_dims[3] == query_dims[3]) { return {{output_dims, present_v_dims, present_v_dims}, ShapeInferStatus::success}; + } // diff kv feature size output_dims[3] = present_v_dims[3]; @@ -75,8 +77,9 @@ class SDPAShapeInfer : public ShapeInferEmptyPads { ShapeInferPtr SDPAShapeInferFactory::makeShapeInfer() const { if (auto sdpa = ov::as_type_ptr(m_op)) { const auto& config = sdpa->get_config(); - if (config.output_BLHxS == false) + if (config.output_BLHxS == false) { return std::make_shared(config); + } } // fallback to ngraph shape infer on non-perf-critical case return make_shape_inference(m_op); diff --git a/src/plugins/intel_cpu/src/shape_inference/static_dimension.cpp b/src/plugins/intel_cpu/src/shape_inference/static_dimension.cpp index 0f8b270151ac49..37f83762c1a98e 100644 --- a/src/plugins/intel_cpu/src/shape_inference/static_dimension.cpp +++ b/src/plugins/intel_cpu/src/shape_inference/static_dimension.cpp @@ -68,8 +68,9 @@ StaticDimension StaticDimension::operator&(const StaticDimension& dim) const { } StaticDimension& StaticDimension::operator&=(const StaticDimension& dim) { - if (*this != dim) + if (*this != dim) { m_dimension = 0; + } return *this; } @@ -82,8 +83,9 @@ bool StaticDimension::same_scheme(const StaticDimension& dim) const { } bool StaticDimension::merge(StaticDimension& dst, const StaticDimension& d1, const StaticDimension& d2) { - if (d1 != d2) + if (d1 != d2) { return false; + } dst = d1; return true; } diff --git a/src/plugins/intel_cpu/src/shape_inference/static_shape.cpp b/src/plugins/intel_cpu/src/shape_inference/static_shape.cpp index 2f7c96e99a869e..dadcf08e14eebd 100644 --- a/src/plugins/intel_cpu/src/shape_inference/static_shape.cpp +++ b/src/plugins/intel_cpu/src/shape_inference/static_shape.cpp @@ -19,8 +19,9 @@ template bool merge_into(StaticShape& dst, const T& src) { auto success = (dst.size() == src.size()); - for (size_t i = 0; success && (i < dst.size()); ++i) + for (size_t i = 0; success && (i < dst.size()); ++i) { success = StaticDimension::merge(dst[i], dst[i], src[i]); + } return success; } @@ -99,17 +100,20 @@ bool StaticShape::broadcast_merge_into(StaticShape& dst, // PDPD broadcast rule. int64_t axis = autob.m_axis; - if (src_rank > dst_rank || axis < -1) + if (src_rank > dst_rank || axis < -1) { return false; + } axis = (axis == -1) ? (dst_rank - src_rank) : axis; - if (src_rank + axis > dst_rank) + if (src_rank + axis > dst_rank) { return false; + } bool success = true; for (int64_t i = 0; i < src_rank; ++i) { - if (src[i].get_length() > dst[axis + i].get_length()) + if (src[i].get_length() > dst[axis + i].get_length()) { return false; + } success &= StaticDimension::broadcast_merge(dst[axis + i], dst[axis + i], src[i]); } diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/sdpa.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/sdpa.cpp index f4fe54e7d41f43..b90d023f1eacdd 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/sdpa.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/sdpa.cpp @@ -44,10 +44,12 @@ void ov::intel_cpu::ScaledDotProductAttentionWithKVCache::validate_and_infer_typ const size_t batch_index = permute_axes.empty() ? 0 : permute_axes[0]; const size_t length_index = permute_axes.empty() ? q_ps.size() - 2 : permute_axes[permute_axes.size() - 2]; const size_t head_num_index = permute_axes.empty() ? q_ps.size() - 3 : permute_axes[permute_axes.size() - 3]; - if (past_k_ps.rank().is_static()) + if (past_k_ps.rank().is_static()) { NODE_VALIDATION_CHECK(this, q_ps.size() == past_k_ps.size()); - if (past_v_ps.rank().is_static()) + } + if (past_v_ps.rank().is_static()) { NODE_VALIDATION_CHECK(this, q_ps.size() == past_v_ps.size()); + } for (size_t i = 0; i < q_ps.size(); i++) { if (i == head_num_index) { if (q_ps[i].is_static() && past_v_ps[i].is_static()) { @@ -84,8 +86,9 @@ void ov::intel_cpu::ScaledDotProductAttentionWithKVCache::validate_and_infer_typ } } } - if (output_logits.rank().is_static() && past_v_ps.rank().is_static()) + if (output_logits.rank().is_static() && past_v_ps.rank().is_static()) { output_logits[output_logits.size() - 1] = past_v_ps[output_logits.size() - 1]; + } set_output_type(0, get_input_element_type(0), output_logits); set_output_type(1, get_input_element_type(input_num - 1), past_k_ps); set_output_type(2, get_input_element_type(input_num - 1), past_v_ps); diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/submodel.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/submodel.cpp index b0ef9c9468b300..e23776b4458884 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/submodel.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/submodel.cpp @@ -16,10 +16,12 @@ SubModel::SubModel(const std::shared_ptr& body) : SubGraphOp() { SubModel::SubModel(const ov::OutputVector& args, const std::shared_ptr& body) : SubGraphOp(args) { SubGraphOp::set_function(body); constructor_validate_and_infer_types(); - for (size_t i = 0; i < body->get_parameters().size(); ++i) + for (size_t i = 0; i < body->get_parameters().size(); ++i) { m_input_descriptions[0].push_back(std::make_shared(i, i)); - for (size_t i = 0; i < body->get_output_size(); ++i) + } + for (size_t i = 0; i < body->get_output_size(); ++i) { m_output_descriptions[0].push_back(std::make_shared(i, i)); + } } SubModel::SubModel(const ov::NodeVector& args, const std::shared_ptr& body) diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/align_matmul_input_ranks.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/align_matmul_input_ranks.cpp index 9f125d802dffba..bea56c1f9a33d8 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/align_matmul_input_ranks.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/align_matmul_input_ranks.cpp @@ -24,8 +24,9 @@ ov::intel_cpu::AlignMatMulInputRanks::AlignMatMulInputRanks() { ov::matcher_pass_callback callback = [this](ov::pass::pattern::Matcher& m) { auto matmul = ov::as_type_ptr(m.get_match_root()); - if (!matmul || transformation_callback(matmul)) + if (!matmul || transformation_callback(matmul)) { return false; + } const auto& input0 = matmul->input_value(0); const auto& input1 = matmul->input_value(1); @@ -38,19 +39,22 @@ ov::intel_cpu::AlignMatMulInputRanks::AlignMatMulInputRanks() { const bool transposedUnsqueeze = input1shape.size() == 1; - if (input0shape.size() == input1shape.size() && input0shape.size() != 1) + if (input0shape.size() == input1shape.size() && input0shape.size() != 1) { return false; // nothing to do + } auto getUnsqueeze = [&](const ov::Output& nodeFrom, const ov::Output& nodeTo) { auto rankFrom = nodeFrom.get_partial_shape().size(); auto rankTo = nodeTo.get_partial_shape().size(); std::vector unsqueeze_axes; - for (int64_t j = 0; j < static_cast(rankTo - rankFrom); ++j) + for (int64_t j = 0; j < static_cast(rankTo - rankFrom); ++j) { unsqueeze_axes.push_back(j); + } - if (transposedUnsqueeze) // special case for one-dimensional second input + if (transposedUnsqueeze) { // special case for one-dimensional second input unsqueeze_axes[unsqueeze_axes.size() - 1]++; + } auto unsqueeze = std::make_shared( nodeFrom, @@ -89,15 +93,17 @@ ov::intel_cpu::AlignMatMulInputRanks::AlignMatMulInputRanks() { matmul_new_inputs[0] = unsqueezeInput0; new_ops.push_back(unsqueezeInput0); - if (input0shape.size() == 1) + if (input0shape.size() == 1) { matmul->set_transpose_a(false); + } } else if (input0shape.size() > input1shape.size()) { std::shared_ptr unsqueezeInput1 = getUnsqueeze(input1, input0); matmul_new_inputs[1] = unsqueezeInput1; new_ops.push_back(unsqueezeInput1); - if (input1shape.size() == 1) + if (input1shape.size() == 1) { matmul->set_transpose_b(false); + } } std::shared_ptr matmul_new = matmul->clone_with_new_inputs(matmul_new_inputs); @@ -117,10 +123,11 @@ ov::intel_cpu::AlignMatMulInputRanks::AlignMatMulInputRanks() { if (ov::is_scalar(output_shape) && can_squeeze_scalar) { squeeze_output = std::make_shared(matmul_new); } else { - if (input0shape.size() == 1) + if (input0shape.size() == 1) { squeeze_axis = new_out_shape_size - 2; - else if (input1shape.size() == 1) + } else if (input1shape.size() == 1) { squeeze_axis = new_out_shape_size - 1; + } squeeze_output = std::make_shared( matmul_new, ov::opset1::Constant::create(ov::element::i64, ov::Shape{1}, {squeeze_axis})); diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/causal_mask_preprocess_fusion.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/causal_mask_preprocess_fusion.cpp index e2bcac397af164..9c69728132c324 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/causal_mask_preprocess_fusion.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/causal_mask_preprocess_fusion.cpp @@ -68,12 +68,16 @@ bool is_triu(ov::opset1::Constant* cmask, size_t rows, size_t columns) { const auto* ptr = reinterpret_cast(cmask->get_data_ptr()); for (size_t y = 0; y < rows; y++, ptr += columns) { size_t x; - for (x = 0; x <= y; x++) - if (ptr[x]) + for (x = 0; x <= y; x++) { + if (ptr[x]) { return false; - for (; x < columns; x++) - if (!ptr[x]) + } + } + for (; x < columns; x++) { + if (!ptr[x]) { return false; + } + } } return true; } @@ -210,20 +214,24 @@ CausalMaskPreprocess::CausalMaskPreprocess() { auto triu = ov::as_type_ptr(pattern_map.find(const_triu)->second.get_node_shared_ptr()); auto triu_shape = triu->get_output_shape(0); - if (triu_shape.size() != 4) + if (triu_shape.size() != 4) { return false; - if (triu_shape[0] != 1 || triu_shape[1] != 1 || triu_shape[2] != triu_shape[3]) + } + if (triu_shape[0] != 1 || triu_shape[1] != 1 || triu_shape[2] != triu_shape[3]) { return false; + } if (!m_global_triu) { auto triu_dtype = triu->get_output_element_type(0); // check if it's triu if (triu_dtype == ov::element::i32) { - if (!is_triu(triu.get(), triu_shape[2], triu_shape[3])) + if (!is_triu(triu.get(), triu_shape[2], triu_shape[3])) { return false; + } } else if (triu_dtype == ov::element::u8) { - if (!is_triu(triu.get(), triu_shape[2], triu_shape[3])) + if (!is_triu(triu.get(), triu_shape[2], triu_shape[3])) { return false; + } } else { return false; } @@ -231,8 +239,9 @@ CausalMaskPreprocess::CausalMaskPreprocess() { m_global_triu = triu; } else { // check identity insread of values to save time - if (triu != m_global_triu) + if (triu != m_global_triu) { return false; + } } ov::OutputVector inputs{ diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_broadcast_to_tiles.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_broadcast_to_tiles.cpp index 0582be3bbf735e..29c33a42eee787 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_broadcast_to_tiles.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_broadcast_to_tiles.cpp @@ -26,8 +26,9 @@ ov::intel_cpu::ConvertBroadcastToTiles::ConvertBroadcastToTiles() { auto shape_node = ov::as_type_ptr(broadcast->input_value(1).get_node_shared_ptr()); auto axes_node = ov::as_type_ptr(broadcast->input_value(2).get_node_shared_ptr()); - if (!shape_node || !axes_node) + if (!shape_node || !axes_node) { return false; + } auto output_shape = shape_node->cast_vector(); auto input_shape = data_node.get_shape(); diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_fq_rnn_to_quantized_rnn.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_fq_rnn_to_quantized_rnn.cpp index 3ef8145cb424c0..29a5ddc1b57952 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_fq_rnn_to_quantized_rnn.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_fq_rnn_to_quantized_rnn.cpp @@ -69,8 +69,9 @@ ov::intel_cpu::ConvertFqRnnToQuantizedRnn::ConvertFqRnnToQuantizedRnn() { ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](ov::pass::pattern::Matcher& m) { auto rnn = m.get_match_root(); - if (!rnn || transformation_callback(rnn)) + if (!rnn || transformation_callback(rnn)) { return false; + } const auto& pattern_map = m.get_pattern_value_map(); const auto& activation = pattern_map.at(X_m); @@ -174,12 +175,14 @@ ov::intel_cpu::ConvertFqRnnToQuantizedRnn::ConvertFqRnnToQuantizedRnn() { const auto weights_scale_constant = ov::as_type_ptr(weights_scale_output.get_node_shared_ptr()); - if (!input_scale_constant || !weights_scale_constant) + if (!input_scale_constant || !weights_scale_constant) { return false; + } const float* input_scale_ptr = input_scale_constant->get_data_ptr(); - if (*input_scale_ptr == 0.f) + if (*input_scale_ptr == 0.f) { OPENVINO_THROW("Cannot handle zero input scale"); + } const float input_scale = 1 / *input_scale_ptr; std::vector weights_scales = weights_scale_constant->get_vector(); diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp index e28485b4c9cb1d..c32c6e0b6a8b97 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp @@ -157,8 +157,9 @@ ov::intel_cpu::ConvertMatMulToFC::ConvertMatMulToFC() { fc->set_friendly_name(matmul->get_friendly_name()); /// todo: CVS-130863 Remove after fp16_compression is copyable - if (ov::fp16_compression_is_disabled(matmul)) + if (ov::fp16_compression_is_disabled(matmul)) { disable_fp16_compression(fc); + } new_ops.push_back(fc); ov::copy_runtime_info(matmul, new_ops); ov::replace_node(matmul, fc); diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_tile_to_seq_tiles.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_tile_to_seq_tiles.cpp index ced4e2bcd07043..699a6e810162d3 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_tile_to_seq_tiles.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_tile_to_seq_tiles.cpp @@ -25,15 +25,17 @@ ov::intel_cpu::ConvertTileToSeqTiles::ConvertTileToSeqTiles() { } auto tiles_node = ov::as_type_ptr(tile->input_value(1).get_node_shared_ptr()); - if (!tiles_node) + if (!tiles_node) { return false; + } auto tiles = tiles_node->cast_vector(); auto input_shape_rank = static_cast(tile->get_input_partial_shape(0).rank().get_length()); int64_t cur_dim_id = tiles.size() - 1; - if (tiles.size() != input_shape_rank) + if (tiles.size() != input_shape_rank) { return false; + } auto last_node = tile->input_value(0); auto friendly_name = tile->get_friendly_name(); diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_to_power_static.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_to_power_static.cpp index 83002c53c4a56d..7696abdbc3df53 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_to_power_static.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_to_power_static.cpp @@ -41,8 +41,9 @@ bool isConvertableToPowerStatic(const std::shared_ptr& node) { const int nonConstPort = 1 - constPort; auto input_rank = node->get_input_partial_shape(nonConstPort).rank(); - if (input_rank.is_dynamic()) + if (input_rank.is_dynamic()) { return false; + } auto const_shape = node->get_input_shape(constPort); return ov::shape_size(const_shape) == 1 && input_rank.get_length() >= static_cast(const_shape.size()) && !ov::intel_cpu::one_of(node->get_input_node_shared_ptr(nonConstPort)->get_type_info(), @@ -61,8 +62,9 @@ bool isConvertableToPowerStatic(const std::shared_ptr& node) { template <> bool isConvertableToPowerStatic(const std::shared_ptr& node) { auto input_rank = node->get_input_partial_shape(0).rank(); - if (input_rank.is_dynamic()) + if (input_rank.is_dynamic()) { return false; + } auto const_node = ov::as_type_ptr(node->get_input_node_shared_ptr(1)); return const_node && input_rank.get_length() >= static_cast(const_node->get_shape().size()) && @@ -135,20 +137,24 @@ ov::intel_cpu::ConvertToPowerStatic::ConvertToPowerStatic() { std::shared_ptr toReplace = node; if (auto power = ov::as_type_ptr(node)) { - if (!isConvertableToPowerStatic(power)) + if (!isConvertableToPowerStatic(power)) { return false; + } toReplace = convert(power); } else if (auto add = ov::as_type_ptr(node)) { - if (!isConvertableToPowerStatic(add)) + if (!isConvertableToPowerStatic(add)) { return false; + } toReplace = convert(add); } else if (auto sub = ov::as_type_ptr(node)) { - if (!isConvertableToPowerStatic(sub)) + if (!isConvertableToPowerStatic(sub)) { return false; + } toReplace = convert(sub); } else if (auto mult = ov::as_type_ptr(node)) { - if (!isConvertableToPowerStatic(mult)) + if (!isConvertableToPowerStatic(mult)) { return false; + } toReplace = convert(mult); } else { OPENVINO_THROW("ConvertToPowerStatic: op type is not supported"); diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_fc_reshape_to_weights.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_fc_reshape_to_weights.cpp index bc883c82484c2b..d651436d565eca 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_fc_reshape_to_weights.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_fc_reshape_to_weights.cpp @@ -72,8 +72,9 @@ ov::intel_cpu::MoveFCReshapeToWeights::MoveFCReshapeToWeights() { const size_t out_channels_idx = with_transpose ? 2 : 1; expected_shape[out_channels_idx] = fc_input_shape[0]; const auto& node_shape = node->get_output_shape(0); - if (node_shape.size() > expected_shape.size()) + if (node_shape.size() > expected_shape.size()) { return false; + } const auto comparison_start_pos = expected_shape.size() - node_shape.size(); return std::equal(node_shape.begin(), node_shape.end(), expected_shape.begin() + comparison_start_pos) || @@ -83,20 +84,23 @@ ov::intel_cpu::MoveFCReshapeToWeights::MoveFCReshapeToWeights() { }; const auto mul = reshape->get_input_node_shared_ptr(0); - if (!check_decompression_shape(mul->get_input_node_shared_ptr(1))) + if (!check_decompression_shape(mul->get_input_node_shared_ptr(1))) { return false; + } const auto mul_parent = mul->get_input_node_shared_ptr(0); const bool with_subtract = ov::is_type(mul_parent); - if (with_subtract && !check_decompression_shape(mul_parent->get_input_node_shared_ptr(1))) + if (with_subtract && !check_decompression_shape(mul_parent->get_input_node_shared_ptr(1))) { return false; + } const auto convert = with_subtract ? mul_parent->get_input_node_shared_ptr(0) : mul_parent; const auto weights = convert->get_input_node_shared_ptr(0); ov::Shape expected_weights_shape(3, 1); expected_weights_shape[1] = fc_input_shape[with_transpose ? 1 : 0]; expected_weights_shape[2] = fc_input_shape[with_transpose ? 0 : 1]; - if (weights->get_output_shape(0) != expected_weights_shape) + if (weights->get_output_shape(0) != expected_weights_shape) { return false; + } auto squeeze_constant = [&](const std::shared_ptr& node) { const auto constant = ov::as_type_ptr(node); @@ -117,8 +121,9 @@ ov::intel_cpu::MoveFCReshapeToWeights::MoveFCReshapeToWeights() { squeeze_constant(weights); if (with_subtract) { auto sub_const = mul_parent->get_input_node_shared_ptr(1); - if (ov::is_type(sub_const)) + if (ov::is_type(sub_const)) { sub_const = sub_const->get_input_node_shared_ptr(0); + } squeeze_constant(sub_const); } return true; diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/ngram_fusion.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/ngram_fusion.cpp index f9b1e4f2b2c053..a34fee71f7c213 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/ngram_fusion.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/ngram_fusion.cpp @@ -103,8 +103,9 @@ ov::intel_cpu::NgramFusion::NgramFusion() { } // save symbol of cropped_shape and check it against first dimension of tokens shape cropped_shape_symbol = pattern_map.at(cropped_shape_m).get_tensor().get_value_symbol()[0]; - if (!symbol::are_equal(tokens_shape[0].get_symbol(), cropped_shape_symbol)) + if (!symbol::are_equal(tokens_shape[0].get_symbol(), cropped_shape_symbol)) { return false; + } } auto cropped_shape_symbol_match = [cropped_shape_symbol](const ov::Output& output) -> bool { @@ -181,8 +182,9 @@ ov::intel_cpu::NgramFusion::NgramFusion() { Matcher select_matcher(select_m); for (size_t i = 0; i < inputs.size(); ++i) { - if (i == as_is_idx) + if (i == as_is_idx) { continue; + } if (!select_matcher.match(inputs[i])) { return false; } diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/rnn_sequences_optimization.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/rnn_sequences_optimization.cpp index b595d1dbcf03ad..29c6e10dab9fc7 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/rnn_sequences_optimization.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/rnn_sequences_optimization.cpp @@ -63,8 +63,9 @@ bool transform(const std::shared_ptr& sequenceOp) { ov::replace_node(sequenceOp->get_input_node_shared_ptr(0), reshape1); const auto& seqTargetInputs = sequenceOp->get_output_target_inputs(0); - if (seqTargetInputs.empty()) + if (seqTargetInputs.empty()) { return false; + } auto transposeAfter = seqTargetInputs.begin()->get_node()->shared_from_this(); auto lstmOutShape = ov::op::util::make_try_fold(sequenceOp->output(0)); @@ -95,8 +96,9 @@ ov::intel_cpu::OptimizeGRUSequenceTransposes::OptimizeGRUSequenceTransposes() { return false; } // Bidirectional cases are not supported - if (gruSequence->get_direction() == ov::op::RecurrentSequenceDirection::BIDIRECTIONAL) + if (gruSequence->get_direction() == ov::op::RecurrentSequenceDirection::BIDIRECTIONAL) { return false; + } return transform(gruSequence); }; @@ -115,8 +117,9 @@ ov::intel_cpu::OptimizeRNNSequenceTransposes::OptimizeRNNSequenceTransposes() { return false; } // Bidirectional cases are not supported - if (rnnSequence->get_direction() == ov::op::RecurrentSequenceDirection::BIDIRECTIONAL) + if (rnnSequence->get_direction() == ov::op::RecurrentSequenceDirection::BIDIRECTIONAL) { return false; + } return transform(rnnSequence); }; diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/stateful_sdpa_fusion.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/stateful_sdpa_fusion.cpp index 8f44582ba89b01..d08ba911756794 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/stateful_sdpa_fusion.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/stateful_sdpa_fusion.cpp @@ -136,8 +136,9 @@ StatefulSDPAFusion::StatefulSDPAFusion() { } } assign = ov::as_type(to_node); - if (assign) + if (assign) { return true; + } } return false; }; @@ -150,8 +151,9 @@ StatefulSDPAFusion::StatefulSDPAFusion() { ov::op::v0::ShapeOf::get_type_info_static(), ov::op::v3::ShapeOf::get_type_info_static(), ov::op::v0::Convert::get_type_info_static(), - ov::op::v8::Gather::get_type_info_static())) + ov::op::v8::Gather::get_type_info_static())) { return false; + } } return true; }; @@ -207,8 +209,9 @@ StatefulSDPAFusion::StatefulSDPAFusion() { for (auto&& node : nodes) { if (pattern_map.count(node)) { auto p = pattern_map.at(node).get_node_shared_ptr(); - if (p->get_output_target_inputs(0).size() != 1) + if (p->get_output_target_inputs(0).size() != 1) { return false; + } } } return true; @@ -277,15 +280,17 @@ StatefulSDPAFusion::StatefulSDPAFusion() { new_node->set_friendly_name(old_node->get_friendly_name()); copy_runtime_info(old_node, new_node); ov::replace_node(old_node, {new_node->output(0)}); - if (assign_cvt_k_node) + if (assign_cvt_k_node) { assign_cvt_k_node->set_arguments({new_node->output(1)}); - else + } else { assign_k_node->set_arguments({new_node->output(1)}); + } - if (assign_cvt_v_node) + if (assign_cvt_v_node) { assign_cvt_v_node->set_arguments({new_node->output(2)}); - else + } else { assign_v_node->set_arguments({new_node->output(2)}); + } // Markup pattern: // ReadValue->Convert(Optional)->ScaledDotProductAttentionWithKVCache->Convert(Optional)->Assign, so that diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/swap_convert_transpose.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/swap_convert_transpose.cpp index aedd17f7ead1f0..ac13ce9c8a8a54 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/swap_convert_transpose.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/swap_convert_transpose.cpp @@ -27,8 +27,9 @@ ov::intel_cpu::SwapConvertTranspose::SwapConvertTranspose() { auto convert = pattern_map.at(convert_m).get_node_shared_ptr(); auto transpose = pattern_map.at(transpose_m).get_node_shared_ptr(); - if (convert->get_output_target_inputs(0).size() != 1) + if (convert->get_output_target_inputs(0).size() != 1) { return false; + } ov::OutputVector transposeInputs = transpose->input_values(); transposeInputs[0] = convert->input_value(0); diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/op/llm_mlp.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/op/llm_mlp.cpp index 91e17eab52f730..70b930ec57ebed 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/op/llm_mlp.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/op/llm_mlp.cpp @@ -38,10 +38,12 @@ void LLMMLPNode::validate_and_infer_types() { INTERNAL_OP_SCOPE(LLMMLPNode_validate_and_infer_types); const auto input_size = get_input_size(); size_t expect_input_size = 4; - if (m_config.gate_up_quantized) + if (m_config.gate_up_quantized) { expect_input_size += 2; - if (m_config.down_quantized) + } + if (m_config.down_quantized) { expect_input_size += 1; + } NODE_VALIDATION_CHECK(this, input_size == expect_input_size); const auto& ishape = get_input_partial_shape(0); diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/convert_to_interaction.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/convert_to_interaction.cpp index 4ca55201b93b6a..6973d86d1318d4 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/convert_to_interaction.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/convert_to_interaction.cpp @@ -103,8 +103,9 @@ ov::intel_cpu::FuseFQtoInteraction::FuseFQtoInteraction() { std::vector fq_scale; if (fq_node) { fq_scale = simplifyToScale(fq_node, 0.001f); - if (fq_scale.empty()) + if (fq_scale.empty()) { return false; + } } bool success = ov::replace_output_update_name(fq_node->output(0), fq_node->input_value(0)); if (!success) { diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/mha_fusion.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/mha_fusion.cpp index 4eeb59fc976564..89987fabd606a5 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/mha_fusion.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/mha_fusion.cpp @@ -63,14 +63,18 @@ ov::intel_cpu::MHAFloatFusion::MHAFloatFusion() { return false; } - if (!valid_transpose_order(pattern_to_output.at(in4).get_node_shared_ptr(), {0, 2, 1, 3})) + if (!valid_transpose_order(pattern_to_output.at(in4).get_node_shared_ptr(), {0, 2, 1, 3})) { return false; - if (!valid_transpose_order(pattern_to_output.at(in5).get_node_shared_ptr(), {0, 2, 3, 1})) + } + if (!valid_transpose_order(pattern_to_output.at(in5).get_node_shared_ptr(), {0, 2, 3, 1})) { return false; - if (!valid_transpose_order(pattern_to_output.at(in9).get_node_shared_ptr(), {0, 2, 1, 3})) + } + if (!valid_transpose_order(pattern_to_output.at(in9).get_node_shared_ptr(), {0, 2, 1, 3})) { return false; - if (!valid_transpose_order(pattern_to_output.at(in10).get_node_shared_ptr(), {0, 2, 1, 3})) + } + if (!valid_transpose_order(pattern_to_output.at(in10).get_node_shared_ptr(), {0, 2, 1, 3})) { return false; + } std::vector mul_scales; if (auto mul_node = ov::as_type_ptr(pattern_to_output.at(mul).get_node_shared_ptr())) { @@ -86,14 +90,17 @@ ov::intel_cpu::MHAFloatFusion::MHAFloatFusion() { } auto matmul0_node = ov::as_type_ptr(pattern_to_output.at(matmul0).get_node_shared_ptr()); - if (!matmul0_node) + if (!matmul0_node) { return false; - if (matmul0_node->get_transpose_a() || matmul0_node->get_transpose_b()) + } + if (matmul0_node->get_transpose_a() || matmul0_node->get_transpose_b()) { return false; + } auto reshape0_node = ov::as_type_ptr(pattern_to_output.at(reshape0).get_node_shared_ptr()); - if (!reshape0_node) + if (!reshape0_node) { return false; + } if (auto reshape_pattern = ov::as_type_ptr(pattern_to_output.at(in6).get_node_shared_ptr())) { @@ -123,16 +130,20 @@ ov::intel_cpu::MHAFloatFusion::MHAFloatFusion() { } auto softmax_node = ov::as_type_ptr(pattern_to_output.at(softmax).get_node_shared_ptr()); - if (!softmax_node) + if (!softmax_node) { return false; - if (softmax_node->get_axis() != 1) + } + if (softmax_node->get_axis() != 1) { return false; + } auto matmul1_node = ov::as_type_ptr(pattern_to_output.at(matmul1).get_node_shared_ptr()); - if (!matmul1_node) + if (!matmul1_node) { return false; - if (matmul1_node->get_transpose_a() || matmul1_node->get_transpose_b()) + } + if (matmul1_node->get_transpose_a() || matmul1_node->get_transpose_b()) { return false; + } bool is_mul_first = true; auto transpose3_node = pattern_to_output.at(transpose3).get_node_shared_ptr(); @@ -216,32 +227,42 @@ ov::intel_cpu::MHAFloatFusion2::MHAFloatFusion2() { return false; } - if (!valid_transpose_order(pattern_to_output.at(in4).get_node_shared_ptr(), {0, 2, 1, 3})) + if (!valid_transpose_order(pattern_to_output.at(in4).get_node_shared_ptr(), {0, 2, 1, 3})) { return false; - if (!valid_transpose_order(pattern_to_output.at(in5).get_node_shared_ptr(), {0, 2, 3, 1})) + } + if (!valid_transpose_order(pattern_to_output.at(in5).get_node_shared_ptr(), {0, 2, 3, 1})) { return false; - if (!valid_transpose_order(pattern_to_output.at(in9).get_node_shared_ptr(), {0, 2, 1, 3})) + } + if (!valid_transpose_order(pattern_to_output.at(in9).get_node_shared_ptr(), {0, 2, 1, 3})) { return false; - if (!valid_transpose_order(pattern_to_output.at(in10).get_node_shared_ptr(), {0, 2, 1, 3})) + } + if (!valid_transpose_order(pattern_to_output.at(in10).get_node_shared_ptr(), {0, 2, 1, 3})) { return false; + } auto matmul0_node = ov::as_type_ptr(pattern_to_output.at(matmul0).get_node_shared_ptr()); - if (!matmul0_node) + if (!matmul0_node) { return false; - if (matmul0_node->get_transpose_a() || matmul0_node->get_transpose_b()) + } + if (matmul0_node->get_transpose_a() || matmul0_node->get_transpose_b()) { return false; + } auto softmax_node = ov::as_type_ptr(pattern_to_output.at(softmax).get_node_shared_ptr()); - if (!softmax_node) + if (!softmax_node) { return false; - if (softmax_node->get_axis() != 3) + } + if (softmax_node->get_axis() != 3) { return false; + } auto matmul1_node = ov::as_type_ptr(pattern_to_output.at(matmul1).get_node_shared_ptr()); - if (!matmul1_node) + if (!matmul1_node) { return false; - if (matmul1_node->get_transpose_a() || matmul1_node->get_transpose_b()) + } + if (matmul1_node->get_transpose_a() || matmul1_node->get_transpose_b()) { return false; + } auto transpose3_node = pattern_to_output.at(transpose3).get_node_shared_ptr(); auto mha = std::make_shared(transpose0_in, @@ -356,33 +377,41 @@ ov::intel_cpu::MHAQuantFusion::MHAQuantFusion() { return false; } - if (!valid_transpose_order(pattern_to_output.at(in4).get_node_shared_ptr(), {0, 2, 1, 3})) + if (!valid_transpose_order(pattern_to_output.at(in4).get_node_shared_ptr(), {0, 2, 1, 3})) { return false; - if (!valid_transpose_order(pattern_to_output.at(in5).get_node_shared_ptr(), {0, 2, 3, 1})) + } + if (!valid_transpose_order(pattern_to_output.at(in5).get_node_shared_ptr(), {0, 2, 3, 1})) { return false; - if (!valid_transpose_order(pattern_to_output.at(in9).get_node_shared_ptr(), {0, 2, 1, 3})) + } + if (!valid_transpose_order(pattern_to_output.at(in9).get_node_shared_ptr(), {0, 2, 1, 3})) { return false; - if (!valid_transpose_order(pattern_to_output.at(in10).get_node_shared_ptr(), {0, 2, 1, 3})) + } + if (!valid_transpose_order(pattern_to_output.at(in10).get_node_shared_ptr(), {0, 2, 1, 3})) { return false; + } auto matmul0_node = ov::as_type_ptr(pattern_to_output.at(matmul0).get_node_shared_ptr()); - if (!matmul0_node) + if (!matmul0_node) { return false; - if (matmul0_node->get_transpose_a() || matmul0_node->get_transpose_b()) + } + if (matmul0_node->get_transpose_a() || matmul0_node->get_transpose_b()) { return false; + } std::vector fq0_scale; auto fq0_node = ov::as_type_ptr(pattern_to_output.at(fakeQuantize0).get_node_shared_ptr()); if (fq0_node) { fq0_scale = simplifyToScale(fq0_node); - if (!fq0_scale.size()) + if (!fq0_scale.size()) { return false; + } } auto reshape0_node = ov::as_type_ptr(pattern_to_output.at(reshape0).get_node_shared_ptr()); - if (!reshape0_node) + if (!reshape0_node) { return false; + } if (auto reshape_pattern = ov::as_type_ptr(pattern_to_output.at(in6).get_node_shared_ptr())) { @@ -412,34 +441,40 @@ ov::intel_cpu::MHAQuantFusion::MHAQuantFusion() { } auto softmax_node = ov::as_type_ptr(pattern_to_output.at(softmax).get_node_shared_ptr()); - if (!softmax_node) + if (!softmax_node) { return false; - if (softmax_node->get_axis() != 1) + } + if (softmax_node->get_axis() != 1) { return false; + } std::vector fq1_scale; auto fq1_node = ov::as_type_ptr(pattern_to_output.at(fakeQuantize1).get_node_shared_ptr()); if (fq1_node) { fq1_scale = simplifyToScale(fq1_node); - if (!fq1_scale.size()) + if (!fq1_scale.size()) { return false; + } } else { return false; } auto matmul1_node = ov::as_type_ptr(pattern_to_output.at(matmul1).get_node_shared_ptr()); - if (!matmul1_node) + if (!matmul1_node) { return false; - if (matmul1_node->get_transpose_a() || matmul1_node->get_transpose_b()) + } + if (matmul1_node->get_transpose_a() || matmul1_node->get_transpose_b()) { return false; + } std::vector fq2_scale; if (auto fq_node = ov::as_type_ptr(pattern_to_output.at(fakeQuantize2).get_node_shared_ptr())) { fq2_scale = simplifyToScale(fq_node); - if (!fq2_scale.size()) + if (!fq2_scale.size()) { return false; + } } bool is_mul_first = false; @@ -562,37 +597,46 @@ ov::intel_cpu::MHAQuantFusion2::MHAQuantFusion2() { return false; } - if (!valid_transpose_order(pattern_to_output.at(in4).get_node_shared_ptr(), {0, 2, 1, 3})) + if (!valid_transpose_order(pattern_to_output.at(in4).get_node_shared_ptr(), {0, 2, 1, 3})) { return false; - if (!valid_transpose_order(pattern_to_output.at(in5).get_node_shared_ptr(), {0, 2, 3, 1})) + } + if (!valid_transpose_order(pattern_to_output.at(in5).get_node_shared_ptr(), {0, 2, 3, 1})) { return false; - if (!valid_transpose_order(pattern_to_output.at(in9).get_node_shared_ptr(), {0, 2, 1, 3})) + } + if (!valid_transpose_order(pattern_to_output.at(in9).get_node_shared_ptr(), {0, 2, 1, 3})) { return false; - if (!valid_transpose_order(pattern_to_output.at(in10).get_node_shared_ptr(), {0, 2, 1, 3})) + } + if (!valid_transpose_order(pattern_to_output.at(in10).get_node_shared_ptr(), {0, 2, 1, 3})) { return false; + } auto matmul0_node = ov::as_type_ptr(pattern_to_output.at(matmul0).get_node_shared_ptr()); - if (!matmul0_node) + if (!matmul0_node) { return false; - if (matmul0_node->get_transpose_a() || matmul0_node->get_transpose_b()) + } + if (matmul0_node->get_transpose_a() || matmul0_node->get_transpose_b()) { return false; + } std::vector fq0_scale; auto fq0_node = ov::as_type_ptr(pattern_to_output.at(fakeQuantize0).get_node_shared_ptr()); if (fq0_node) { fq0_scale = simplifyToScale(fq0_node); - if (!fq0_scale.size()) + if (!fq0_scale.size()) { return false; + } } else { return false; } auto softmax_node = ov::as_type_ptr(pattern_to_output.at(softmax).get_node_shared_ptr()); - if (!softmax_node) + if (!softmax_node) { return false; - if (softmax_node->get_axis() != 3) + } + if (softmax_node->get_axis() != 3) { return false; + } std::vector fq1_scale; const bool fakeQuantize1Exists = pattern_to_output.find(fakeQuantize1) != pattern_to_output.end(); @@ -600,16 +644,19 @@ ov::intel_cpu::MHAQuantFusion2::MHAQuantFusion2() { if (auto fq_node = ov::as_type_ptr( pattern_to_output.at(fakeQuantize1).get_node_shared_ptr())) { fq1_scale = simplifyToScale(fq_node); - if (!fq1_scale.size()) + if (!fq1_scale.size()) { return false; + } } } auto matmul1_node = ov::as_type_ptr(pattern_to_output.at(matmul1).get_node_shared_ptr()); - if (!matmul1_node) + if (!matmul1_node) { return false; - if (matmul1_node->get_transpose_a() || matmul1_node->get_transpose_b()) + } + if (matmul1_node->get_transpose_a() || matmul1_node->get_transpose_b()) { return false; + } bool is_mul_first = true; auto transpose3_node = pattern_to_output.at(transpose3).get_node_shared_ptr(); diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/mlp_fusion.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/mlp_fusion.cpp index d4988ce9f43337..ac09d05931054c 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/mlp_fusion.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/mlp_fusion.cpp @@ -153,8 +153,9 @@ ov::intel_cpu::MLPFusion::MLPFusion() { up_proj_w = pattern_map.at(up_proj_weight_i8); if (pattern_map.count(down_proj_weight_i8) > 0) { - if (pattern_map.count(down_proj_weight_scales_per_OC) == 0) + if (pattern_map.count(down_proj_weight_scales_per_OC) == 0) { return false; + } is_down_proj_int8 = true; down_proj_w = pattern_map.at(down_proj_weight_i8); } else { @@ -172,29 +173,37 @@ ov::intel_cpu::MLPFusion::MLPFusion() { // make sure that: // - shape of gate/up's weight is [down_size, up_size] // - shape of down's weight is [up_size, down_size] - if (!gate_proj_w_pshape.is_static()) + if (!gate_proj_w_pshape.is_static()) { return false; - if (!up_proj_w_pshape.is_static()) + } + if (!up_proj_w_pshape.is_static()) { return false; - if (!down_proj_w_pshape.is_static()) + } + if (!down_proj_w_pshape.is_static()) { return false; + } auto up_shape = up_proj_w_pshape.get_shape(); auto down_shape = down_proj_w_pshape.get_shape(); - if (gate_proj_w_pshape.get_shape() != up_shape) + if (gate_proj_w_pshape.get_shape() != up_shape) { return false; - if (up_shape.size() != 2) + } + if (up_shape.size() != 2) { return false; - if (down_shape.size() != 2) + } + if (down_shape.size() != 2) { return false; + } auto up_size = is_gate_up_combined ? (up_shape[0] / 2) : (up_shape[0]); auto down_size = up_shape[1]; - if (down_shape[0] != down_size) + if (down_shape[0] != down_size) { return false; - if (down_shape[1] != up_size) + } + if (down_shape[1] != up_size) { return false; + } LLMMLPNode::Config config; OutputVector new_args; diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/qkv_proj_fusion.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/qkv_proj_fusion.cpp index 91aadfc97ec3c2..86fb3587ff4eee 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/qkv_proj_fusion.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/qkv_proj_fusion.cpp @@ -88,23 +88,27 @@ ov::intel_cpu::QKVProjFusion::QKVProjFusion() { if (is_quantized_int8) { auto deq_mul = ov::as_type_ptr(mm_input1); - if (!deq_mul) + if (!deq_mul) { return false; + } auto deq_mul_in0 = deq_mul->input_value(0).get_node_shared_ptr(); auto deq_mul_in1 = deq_mul->input_value(1).get_node_shared_ptr(); auto cvt = ov::as_type_ptr(deq_mul_in0); - if (!cvt) + if (!cvt) { return false; + } constw = ov::as_type_ptr(cvt->input_value(0).get_node_shared_ptr()); - if (!constw || constw->get_element_type() != ov::element::i8) + if (!constw || constw->get_element_type() != ov::element::i8) { return false; + } deq_scale = ov::as_type_ptr(deq_mul_in1); - if (!deq_scale || deq_scale->get_element_type() != ov::element::f32) + if (!deq_scale || deq_scale->get_element_type() != ov::element::f32) { return false; + } } else { constw = ov::as_type_ptr(mm_input1); if (!constw) { @@ -114,8 +118,9 @@ ov::intel_cpu::QKVProjFusion::QKVProjFusion() { return false; } } - if (!constw) + if (!constw) { return false; + } } // input feature size should be the same @@ -141,8 +146,9 @@ ov::intel_cpu::QKVProjFusion::QKVProjFusion() { } // append dequantize scales at the end if (is_quantized_int8) { - for (auto& d : deq_scales) + for (auto& d : deq_scales) { args.push_back(d); + } } QKVProjectionNode::Config config; @@ -210,17 +216,21 @@ ov::intel_cpu::QKVProjFusion2::QKVProjFusion2() { auto node_split_lengths = ov::as_type_ptr(pattern_map.at(qkv_split_lengths).get_node_shared_ptr()); - if (!node_split_lengths) + if (!node_split_lengths) { return false; + } auto split_lengths = node_split_lengths->get_vector(); - if (split_lengths.size() != 3) + if (split_lengths.size() != 3) { return false; + } auto proj_size = split_lengths[0]; - if (split_lengths[1] != proj_size) + if (split_lengths[1] != proj_size) { return false; - if (split_lengths[2] != proj_size) + } + if (split_lengths[2] != proj_size) { return false; + } bool is_quantized_int8 = pattern_map.count(qkv_proj_weight_const_i8); @@ -232,12 +242,14 @@ ov::intel_cpu::QKVProjFusion2::QKVProjFusion2() { qkv_proj_weight_node = ov::as_type_ptr(pattern_map.at(qkv_proj_weight_const).get_node_shared_ptr()); } - if (!qkv_proj_weight_node) + if (!qkv_proj_weight_node) { return false; + } auto w_shape = qkv_proj_weight_node->get_shape(); - if (w_shape[0] != static_cast(proj_size * 3)) + if (w_shape[0] != static_cast(proj_size * 3)) { return false; + } QKVProjectionNode::Config config; config.quantized = is_quantized_int8; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/aarch64/pass/snippets_mark_skipped.cpp b/src/plugins/intel_cpu/src/transformations/snippets/aarch64/pass/snippets_mark_skipped.cpp index c567e7c38c2ef1..54e2554bf5348d 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/aarch64/pass/snippets_mark_skipped.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/aarch64/pass/snippets_mark_skipped.cpp @@ -20,8 +20,9 @@ static const int DEFAULT_AXIS = 1; NodeFusingType GetNodeFusingType(const std::shared_ptr& node) { auto& rt = node->get_rt_info(); const auto rinfo = rt.find("MayBeFusedInPlugin"); - if (rinfo == rt.end()) + if (rinfo == rt.end()) { return NodeFusingType::NotSet; + } return rinfo->second.as(); } void SetNodeFusingType(const std::shared_ptr& node, NodeFusingType nodeType) { @@ -46,8 +47,9 @@ int getNumNonConstInputs(const std::shared_ptr& node) { if (ov::is_type(parent)) { for (const auto& grandparent_out : parent->input_values()) { const auto grandparent = grandparent_out.get_node_shared_ptr(); - if (!ov::is_type(grandparent)) + if (!ov::is_type(grandparent)) { num_non_const_inputs++; + } } } else if (!ov::is_type(parent)) { num_non_const_inputs++; @@ -56,8 +58,9 @@ int getNumNonConstInputs(const std::shared_ptr& node) { return num_non_const_inputs; } bool isFullyConnected(const std::shared_ptr& node) { - if (!ov::is_type(node)) + if (!ov::is_type(node)) { return false; + } const auto out_activations = node->input_value(0); const auto out_weights = node->input_value(1); const auto rank_a = out_activations.get_partial_shape().rank(); @@ -119,8 +122,9 @@ bool isSuitableChildForFusingSimple(const std::shared_ptr& node) { return SupportsFusingWithConvolution_Simple(node) && getNumNonConstInputs(node) == 1; } bool isSuitableChildForFusingBias(const std::shared_ptr& node, int fusingAxis) { - if (!ov::is_type(node)) + if (!ov::is_type(node)) { return false; + } auto is_suitable_parent = [](const std::shared_ptr& node) { return (ov::is_type(node) || ov::is_type(node) || @@ -132,25 +136,31 @@ bool isSuitableChildForFusingBias(const std::shared_ptr& node, int f const auto& parent = parent_out.get_node_shared_ptr(); const auto& parent_pshape = parent_out.get_partial_shape(); if (is_suitable_parent(parent) && parent_pshape.rank().is_static()) { - if (parent->get_output_target_inputs(0).size() > 1) + if (parent->get_output_target_inputs(0).size() > 1) { break; + } const auto bias_port = 1 - in.get_index(); const auto bias_out = node->input_value(bias_port); - if ((bias_out.get_target_inputs().size() > 1) || !ov::op::util::is_on_constant_path(bias_out)) + if ((bias_out.get_target_inputs().size() > 1) || !ov::op::util::is_on_constant_path(bias_out)) { break; + } const auto& bias_pshape = bias_out.get_partial_shape(); - if (bias_pshape.is_dynamic()) + if (bias_pshape.is_dynamic()) { break; + } const auto bias_shape_norm = getNormalizedDimsBySize(bias_pshape.get_shape(), parent_pshape.size()); if (fusingAxis >= static_cast(bias_shape_norm.size()) || fusingAxis >= static_cast(parent_pshape.size()) || - bias_shape_norm.size() != parent_pshape.size() || bias_shape_norm.size() < 2) + bias_shape_norm.size() != parent_pshape.size() || bias_shape_norm.size() < 2) { break; - if (parent_pshape[fusingAxis].is_dynamic()) + } + if (parent_pshape[fusingAxis].is_dynamic()) { break; + } if ((bias_shape_norm[fusingAxis] == static_cast(parent_pshape[fusingAxis].get_length())) && - (bias_shape_norm[fusingAxis] == shape_size(bias_shape_norm))) + (bias_shape_norm[fusingAxis] == shape_size(bias_shape_norm))) { return true; + } } } return false; @@ -187,21 +197,24 @@ void MarkSubgraphOpAsSkipped(const std::shared_ptr& node) { } bool isSuitableConvert(const std::shared_ptr& node) { - if (!ov::is_type(node)) + if (!ov::is_type(node)) { return false; + } auto isSuitableParent = [](const std::shared_ptr& node) { for (const auto& input : node->inputs()) { const auto parent = input.get_source_output().get_node_shared_ptr(); - if (!ov::is_type(parent)) + if (!ov::is_type(parent)) { return false; + } } return true; }; auto isSuitableChild = [](const std::shared_ptr& node) { for (const auto& out : node->outputs()) { const auto& child = out.get_node_shared_ptr(); - if (!ov::is_type(child)) + if (!ov::is_type(child)) { return false; + } } return true; }; @@ -225,8 +238,9 @@ bool SnippetsMarkSkipped::run_on_model(const std::shared_ptr& m) { RUN_ON_MODEL_SCOPE(SnippetsMarkSkipped); int channelAxis = DEFAULT_AXIS; for (auto& node : m->get_ordered_ops()) { - if (is_skipped_op(node)) + if (is_skipped_op(node)) { continue; + } // We perform this check separately because we mark here only weights path // Matmul itself will be checked further if (isSuitableMatMulWithConstantPath(node)) { diff --git a/src/plugins/intel_cpu/src/transformations/snippets/aarch64/shape_inference.cpp b/src/plugins/intel_cpu/src/transformations/snippets/aarch64/shape_inference.cpp index a3c9a1c184d550..c765912e3e31e0 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/aarch64/shape_inference.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/aarch64/shape_inference.cpp @@ -15,8 +15,9 @@ using ShapeInferPtr = IShapeInferSnippetsFactory::ShapeInferPtr; ShapeInferPtr CPUShapeInferSnippetsFactory::get_specific_op_shape_infer(const ov::DiscreteTypeInfo& key, const std::shared_ptr& op) const { const auto& maker_iter = specific_ops_registry.find(key); - if (maker_iter != specific_ops_registry.end()) + if (maker_iter != specific_ops_registry.end()) { return maker_iter->second(op); + } return {}; } diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_utils.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_utils.cpp index f360437d59da6b..cfa9018e1b9801 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_utils.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/brgemm_utils.cpp @@ -35,10 +35,10 @@ cpu_isa_t get_primitive_isa(const ov::element::Type& dt_in0, bool is_with_amx) { // Note: AMX might be not used even if it's supported by the hardware, check the BrgemmToBrgemmCPU pass for details if (is_with_amx) { - if (dt_in0 == ov::element::f16) + if (dt_in0 == ov::element::f16) { SUPPORT_ONE(avx512_core_amx_fp16, "Unsupported hardware configuration: amx is supported only on avx512 platforms") - else + } else SUPPORT_ONE(avx512_core_amx, "Unsupported hardware configuration: amx is supported only on avx512 platforms") } else if (dt_in0 == ov::element::bf16) { @@ -60,8 +60,9 @@ cpu_isa_t get_primitive_isa(const ov::element::Type& dt_in0, bool is_with_amx) { } BRGEMM_TYPE get_brgemm_type(const ov::element::Type& element_type_a, bool transpose_b) { - if (element_type_a == element::f32) + if (element_type_a == element::f32) { return transpose_b ? BRGEMM_TYPE::REPACKING_ONLY : BRGEMM_TYPE::STAND_ALONE; + } OPENVINO_ASSERT(element_type_a != element::bf16 || mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16), "BrgemmCPU BF16 precision is not supported on non avx512_core_bf16 system"); @@ -69,18 +70,23 @@ BRGEMM_TYPE get_brgemm_type(const ov::element::Type& element_type_a, bool transp "BrgemmCPU FP16 precision is not supported on non avx512_core_amx_fp16 system"); if (one_of(element_type_a, element::u8, element::i8, element::bf16) && - dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx)) + dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx)) { return BRGEMM_TYPE::WITH_AMX; - if (element_type_a == ov::element::f16 && dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx_fp16)) + } + if (element_type_a == ov::element::f16 && + dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx_fp16)) { return BRGEMM_TYPE::WITH_AMX; + } // Note: this condition reproduces logic from the OneDNN Brgemm implementation. This is needed to align with the // backend requirements. More details in onednn/src/cpu/x64/brgemm/brgemm_utils.cpp - if (element_type_a == ov::element::i8) + if (element_type_a == ov::element::i8) { return dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2) ? BRGEMM_TYPE::REPACKING_ONLY : BRGEMM_TYPE::WITH_COMPENSATIONS; + } - if (one_of(element_type_a, element::u8, ov::element::bf16)) + if (one_of(element_type_a, element::u8, ov::element::bf16)) { return BRGEMM_TYPE::REPACKING_ONLY; + } OV_CPU_JIT_EMITTER_THROW("Failed to determine brgemm mode"); } diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp index 48456b8220300a..b52f2628816279 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp @@ -28,7 +28,7 @@ namespace { template void set_full_port_desc(const T& port) { const auto& shape_rank = port.get_partial_shape().size(); - static const std::vector full_dim_subtensor(std::min(shape_rank, size_t(2)), + static const std::vector full_dim_subtensor(std::min(shape_rank, static_cast(2)), ov::snippets::utils::get_full_dim_value()); PortDescriptorUtils::set_port_descriptor(port, full_dim_subtensor); } @@ -47,8 +47,9 @@ pass::BrgemmToBrgemmCPU::BrgemmToBrgemmCPU() { const auto node = m.get_match_root(); const auto brgemm = ov::as_type_ptr(node); const auto brgemm_plugin = ov::as_type_ptr(node); - if (!brgemm || brgemm_plugin) + if (!brgemm || brgemm_plugin) { OPENVINO_THROW("BrgemmCPU cannot be in body before BrgemmToBrgemmCPU pass"); + } const auto& brgemm_in0_desc = PortDescriptorUtils::get_port_descriptor_ptr(brgemm->input(0)); const auto& brgemm_in1_desc = PortDescriptorUtils::get_port_descriptor_ptr(brgemm->input(1)); @@ -93,8 +94,9 @@ pass::BrgemmToBrgemmCPU::BrgemmToBrgemmCPU() { PortDescriptorUtils::set_port_descriptor(brgemm_repacking->input(0), brgemm_in1_desc->get_subtensor(), layout_b); - for (const auto& output : brgemm_repacking->outputs()) + for (const auto& output : brgemm_repacking->outputs()) { set_full_port_desc(output); + } if (with_amx(brgemm_type)) { const auto scratch = std::make_shared(ov::Shape{BrgemmCPU::SCRATCH_BYTE_SIZE}); @@ -152,8 +154,9 @@ pass::BrgemmToBrgemmCPU::BrgemmToBrgemmCPU() { // need to run validate_and_infer_types manually: either input shapes were updated or // output Layout was updated (out shape will be updated in validate_and_infer_types()) - if (brgemm_repacking) + if (brgemm_repacking) { brgemm_repacking->validate_and_infer_types(); + } brgemm_cpu->validate_and_infer_types(); return true; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/eliminate_brgemm_copy_b.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/eliminate_brgemm_copy_b.cpp index b87a78c6b0cb40..65107147c87471 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/eliminate_brgemm_copy_b.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/eliminate_brgemm_copy_b.cpp @@ -38,8 +38,9 @@ pass::EliminateBrgemmCopyB::EliminateBrgemmCopyB() { // TODO [157340]: support external repacking for copyB with compensations if (!is_supported_layout(layout) || brgemm_utils::with_compensations(copy_b_node->get_type()) || - transformation_callback(copy_b_node)) + transformation_callback(copy_b_node)) { return false; + } // If there is non-planar layout, we should insert reshape to support shape inference if (!ov::snippets::utils::is_planar_layout(layout)) { diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/enforce_precision.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/enforce_precision.cpp index 46c48425157ef9..ad3685b5731a72 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/enforce_precision.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/enforce_precision.cpp @@ -123,10 +123,12 @@ std::set> EnforcePrecision::get_supported_precisi const std::shared_ptr& op) noexcept { std::set> types; if (ov::is_type(op)) { - if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx_fp16)) + if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx_fp16)) { types.insert({element::f16, element::f16}); - if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16)) + } + if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16)) { types.insert({element::bf16, element::bf16}); + } } return types; } diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/adjust_brgemm_copy_b_loop_ports.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/adjust_brgemm_copy_b_loop_ports.cpp index 1e7abd6110f45e..f283d05881d24f 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/adjust_brgemm_copy_b_loop_ports.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/adjust_brgemm_copy_b_loop_ports.cpp @@ -74,8 +74,9 @@ bool pass::AdjustBrgemmCopyBLoopPorts::run(const snippets::lowered::LinearIR& li const auto& shape_infer_seq = ov::snippets::utils::get_first_parent_shape_infer_expr_seq(brgemm_in1.get_expr()); const auto source = shape_infer_seq.empty() ? brgemm_in1 : shape_infer_seq.back()->get_input_port_connector(0)->get_source(); - if (is_type(source.get_expr()->get_node())) + if (is_type(source.get_expr()->get_node())) { return std::vector{}; + } const auto repacking_expr = brgemm_utils::repacking::get_copy_b_expr(brgemm_expr); OPENVINO_ASSERT(repacking_expr, "BrgemmCopyB expression is not found"); return repacking_expr->get_loop_ids(); @@ -83,21 +84,24 @@ bool pass::AdjustBrgemmCopyBLoopPorts::run(const snippets::lowered::LinearIR& li for (const auto& expr : linear_ir) { const auto brgemm = ov::as_type_ptr(expr->get_node()); - if (!brgemm || !brgemm_utils::with_repacking(brgemm->get_type())) + if (!brgemm || !brgemm_utils::with_repacking(brgemm->get_type())) { continue; + } const auto& brgemm_loop_ids = expr->get_loop_ids(); const auto& repacking_loop_ids = get_repacking_loop_idces(expr); // Continue if there is no blocking loop - if (brgemm_loop_ids.empty() && repacking_loop_ids.empty()) + if (brgemm_loop_ids.empty() && repacking_loop_ids.empty()) { continue; + } OPENVINO_ASSERT(brgemm_loop_ids.size() > repacking_loop_ids.size(), "Invalid BrgemmCopyB loop configuration"); const auto& loop_manager = linear_ir.get_loop_manager(); for (auto i = repacking_loop_ids.size(); i < brgemm_loop_ids.size(); i++) { const auto& loop = loop_manager->get_loop_info(brgemm_loop_ids[i]); auto uni_loop = ov::as_type_ptr(loop); - if (!uni_loop) + if (!uni_loop) { uni_loop = ov::as_type_ptr(loop)->get_unified_loop_info(); + } if (!m_affected_loops.count(uni_loop) && update_loop_info(uni_loop)) { m_affected_loops.insert(uni_loop); modified = true; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_copy_b_loop_ports_adjuster.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_copy_b_loop_ports_adjuster.cpp index bdf811bad90c9d..33263cfccce582 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_copy_b_loop_ports_adjuster.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_copy_b_loop_ports_adjuster.cpp @@ -14,8 +14,9 @@ namespace intel_cpu { BrgemmCopyBLoopPortsAdjuster::BrgemmCopyBLoopPortsAdjuster(const ov::snippets::lowered::LinearIRCPtr& linear_ir, const CPURuntimeConfigurator* configurator) : ov::snippets::lowered::pass::RuntimeOptimizer(configurator) { - if (!linear_ir->is_dynamic()) + if (!linear_ir->is_dynamic()) { return; + } const auto& pass = std::make_shared(); pass->run(*linear_ir); @@ -24,8 +25,9 @@ BrgemmCopyBLoopPortsAdjuster::BrgemmCopyBLoopPortsAdjuster(const ov::snippets::l for (const auto& p : loop_map) { if (const auto& exp_loop = ov::as_type_ptr(p.second)) { const auto& uni_loop = exp_loop->get_unified_loop_info(); - if (affected_uni_loops.count(uni_loop)) + if (affected_uni_loops.count(uni_loop)) { m_affected_uni2exp_map[uni_loop].push_back(exp_loop); + } } } } @@ -38,8 +40,9 @@ bool BrgemmCopyBLoopPortsAdjuster::run(const snippets::lowered::LinearIR& linear snippets::RuntimeConfigurator::LoopInfoRuntimeParamsMap initialized_info; if (intel_cpu::pass::AdjustBrgemmCopyBLoopPorts::update_loop_info(uni_loop)) { initialized_info[uni_loop] = snippets::RuntimeConfigurator::get_loop_runtime_params(uni_loop); - for (const auto& exp_loop : exp_loops) + for (const auto& exp_loop : exp_loops) { snippets::RuntimeConfigurator::update_expanded_loop_info(exp_loop, initialized_info); + } } } return true; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_cpu_blocking.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_cpu_blocking.cpp index 9a7dd2dbe727c7..017c7e75d35e42 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_cpu_blocking.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_cpu_blocking.cpp @@ -87,8 +87,9 @@ bool BrgemmCPUBlocking::mark_blocking_loops(LinearIR& linear_ir, n_block, k_block); - if (stand_alone(type)) + if (stand_alone(type)) { return res; + } const auto copy_b_expr = repacking::get_copy_b_expr(brgemm_expr); if (copy_b_expr) { @@ -121,14 +122,17 @@ bool BrgemmCPUBlocking::mark_blocking_loops(LinearIR& linear_ir, OPENVINO_ASSERT(in_ports.size() > 1, "Invalid number of input loop ports"); loop_info->replace_with_new_ports(in_ports[1], {in_ports[1], new_port}); }; - if (!is_full_dim_value(m_block)) + if (!is_full_dim_value(m_block)) { update_loop_info(LoopPort::create(compens_port)); + } - if (!is_full_dim_value(n_block)) + if (!is_full_dim_value(n_block)) { update_loop_info(LoopPort::create(compens_port, 0)); + } - if (!is_full_dim_value(k_block)) + if (!is_full_dim_value(k_block)) { update_loop_info(LoopPort::create(compens_port, 1)); + } } return true; } diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp index c4db9feb0c639f..ff05287b2c0cf4 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.cpp @@ -16,19 +16,22 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_load_convert( const auto& convert_expr = *convert_it; const auto& convert = ov::as_type_ptr(convert_expr->get_node()); const auto& input_connector = convert_expr->get_input_port_connector(0); - if (convert->get_destination_type() != ov::element::f32 && convert->get_destination_type() != ov::element::i32) + if (convert->get_destination_type() != ov::element::f32 && convert->get_destination_type() != ov::element::i32) { return false; + } const auto& load_output = input_connector->get_source(); const auto& load_expr = load_output.get_expr(); const auto load = ov::as_type_ptr(load_expr->get_node()); if (!load || ov::is_type(load_expr->get_node()) || - ov::is_type(load_expr->get_node())) + ov::is_type(load_expr->get_node())) { return false; + } const auto consumers = input_connector->get_consumers(); - if (consumers.size() != 1) + if (consumers.size() != 1) { return false; + } OPENVINO_ASSERT(convert_expr->get_loop_ids() == load_expr->get_loop_ids(), "The pair of Load and Convert expressions must be in the same loops!"); @@ -63,18 +66,21 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::fuse_store_convert( const auto& convert = ov::as_type_ptr(convert_expr->get_node()); const auto& output_connector = convert_expr->get_output_port_connector(0); if (convert->get_input_element_type(0) != ov::element::f32 && - convert->get_input_element_type(0) != ov::element::i32) + convert->get_input_element_type(0) != ov::element::i32) { return false; + } const auto consumers = output_connector->get_consumers(); - if (consumers.size() != 1) + if (consumers.size() != 1) { return false; + } const auto store_input = *(consumers.begin()); const auto& store_expr = store_input.get_expr(); const auto store = ov::as_type_ptr(store_expr->get_node()); - if (!store) + if (!store) { return false; + } OPENVINO_ASSERT(convert_expr->get_loop_ids() == store_expr->get_loop_ids(), "The pair of Convert and Store expressions must be in the same loops!"); @@ -112,8 +118,9 @@ bool ov::intel_cpu::pass::FuseLoadStoreConvert::run(snippets::lowered::LinearIR& for (auto expr_it = begin; expr_it != end; expr_it++) { const auto& expr = *expr_it; const auto& convert = expr->get_node(); - if (!ov::is_type(convert)) + if (!ov::is_type(convert)) { continue; + } if (fuse_load_convert(linear_ir, expr_it)) { modified = true; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/snippets_mark_skipped.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/snippets_mark_skipped.cpp index 4ae8be0bb5612c..e32771c7817b08 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/snippets_mark_skipped.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/snippets_mark_skipped.cpp @@ -21,8 +21,9 @@ static const int DEFAULT_AXIS = 1; NodeFusingType GetNodeFusingType(const std::shared_ptr& node) { auto& rt = node->get_rt_info(); const auto rinfo = rt.find("MayBeFusedInPlugin"); - if (rinfo == rt.end()) + if (rinfo == rt.end()) { return NodeFusingType::NotSet; + } return rinfo->second.as(); } void SetNodeFusingType(const std::shared_ptr& node, NodeFusingType nodeType) { @@ -47,8 +48,9 @@ int getNumNonConstInputs(const std::shared_ptr& node) { if (ov::is_type(parent)) { for (const auto& grandparent_out : parent->input_values()) { const auto grandparent = grandparent_out.get_node_shared_ptr(); - if (!ov::is_type(grandparent)) + if (!ov::is_type(grandparent)) { num_non_const_inputs++; + } } } else if (!ov::is_type(parent)) { num_non_const_inputs++; @@ -57,8 +59,9 @@ int getNumNonConstInputs(const std::shared_ptr& node) { return num_non_const_inputs; } bool isFullyConnected(const std::shared_ptr& node) { - if (!ov::is_type(node)) + if (!ov::is_type(node)) { return false; + } const auto out_activations = node->input_value(0); const auto out_weights = node->input_value(1); const auto rank_a = out_activations.get_partial_shape().rank(); @@ -87,27 +90,31 @@ bool canBePerformedAsScaleShift(const std::shared_ptr& node, const i fusingPort = i; dataShape = node->get_input_partial_shape(i); // only one non-const parent is allowed - if (++numNonConstInputs != 1) + if (++numNonConstInputs != 1) { return false; + } } else { // every const parent must have exactly one child const auto out = parent->outputs(); const bool has_only_child = (out.size() == 1) && (out[0].get_target_inputs().size() == 1); - if (!has_only_child) + if (!has_only_child) { return false; + } } } const auto isBroadcastableToDataInput = [&]() { for (size_t i = 0; i < node->get_input_size(); i++) { - if (i == fusingPort) + if (i == fusingPort) { continue; + } const ov::PartialShape weightShape = node->get_input_partial_shape(i); if (!isPerTensorOrPerChannelBroadcastable(dataShape.get_max_shape(), weightShape.get_max_shape(), channelAxis, - true)) + true)) { return false; + } } return true; }; @@ -193,34 +200,41 @@ bool isSuitableSubtractAsZeroPointsParent(const std::shared_ptr& nod const auto out = node->outputs(); const bool has_only_child = (out.size() == 1) && (out[0].get_target_inputs().size() == 1); const bool has_two_parents = node->get_input_size() == 2; - if (!(is_suitable_node && has_only_child && has_two_parents)) + if (!(is_suitable_node && has_only_child && has_two_parents)) { return false; + } const auto child = node->get_output_target_inputs(0).begin()->get_node()->shared_from_this(); const bool is_conv = ov::is_type(child); const bool is_group_conv = ov::is_type(child); - if (!is_conv && !is_group_conv) + if (!is_conv && !is_group_conv) { return false; + } const auto weight_pshape = child->get_input_partial_shape(1); - if (weight_pshape.is_dynamic()) + if (weight_pshape.is_dynamic()) { return false; + } const auto weight_shape = weight_pshape.get_shape(); const bool is_depthwise = is_group_conv && weight_shape[1] == 1 && weight_shape[2] == 1; const auto depthwise_rank = child->get_input_partial_shape(0).rank(); - if (depthwise_rank.is_dynamic()) + if (depthwise_rank.is_dynamic()) { return false; + } const bool deptwise_is_suitable = implication(is_depthwise, depthwise_rank.get_length() < 5); - if (!deptwise_is_suitable) + if (!deptwise_is_suitable) { return false; + } const auto zp_weights = node->get_input_node_shared_ptr(1); const auto zp_weight_pshape = zp_weights->get_output_partial_shape(0); - if (zp_weight_pshape.is_dynamic()) + if (zp_weight_pshape.is_dynamic()) { return false; + } const auto zp_weight_shape = zp_weight_pshape.get_shape(); auto correct_shape = ov::Shape(zp_weight_shape.size(), 1); - if (zp_weight_shape.size() > 1) + if (zp_weight_shape.size() > 1) { correct_shape[1] = zp_weight_shape[1]; + } const bool zp_weights_is_suitable = ov::is_type(zp_weights) && zp_weights->get_element_type() == ov::element::u8 && zp_weight_shape.size() >= 2 && correct_shape == zp_weight_shape; @@ -257,18 +271,22 @@ bool isSuitableChildForFusingMatMul(const std::shared_ptr& node, const auto& parent = parent_out.get_node_shared_ptr(); const auto& parent_pshape = parent_out.get_partial_shape(); if (ov::is_type(parent) && parent_pshape.rank().is_static()) { - if (parent->get_output_target_inputs(0).size() > 1) + if (parent->get_output_target_inputs(0).size() > 1) { break; + } const auto bias_port = 1 - in.get_index(); const auto bias_out = node->input_value(bias_port); - if ((bias_out.get_target_inputs().size() > 1) || !ov::op::util::is_on_constant_path(bias_out)) + if ((bias_out.get_target_inputs().size() > 1) || !ov::op::util::is_on_constant_path(bias_out)) { break; + } const auto& bias_pshape = bias_out.get_partial_shape(); - if (bias_pshape.is_dynamic()) + if (bias_pshape.is_dynamic()) { break; + } auto getNormalizedPShape = [](const ov::PartialShape& dims, size_t ndims) -> ov::PartialShape { - if (dims.size() >= ndims) + if (dims.size() >= ndims) { return dims; + } ov::PartialShape pshape(std::vector(ndims, 1)); std::copy(dims.rbegin(), dims.rend(), pshape.rbegin()); return pshape; @@ -276,12 +294,14 @@ bool isSuitableChildForFusingMatMul(const std::shared_ptr& node, const auto bias_pshape_norm = getNormalizedPShape(bias_pshape, parent_pshape.size()); if (fusingAxis >= static_cast(bias_pshape_norm.size()) || fusingAxis >= static_cast(parent_pshape.size()) || - bias_pshape_norm.size() != parent_pshape.size() || bias_pshape_norm.size() < 2) + bias_pshape_norm.size() != parent_pshape.size() || bias_pshape_norm.size() < 2) { break; + } if (((bias_pshape_norm[fusingAxis] == parent_pshape[fusingAxis]) || (is_dq_scales && bias_pshape_norm[fusingAxis] == 1)) && - (bias_pshape_norm[fusingAxis] == static_cast(shape_size(bias_pshape_norm.get_shape())))) + (bias_pshape_norm[fusingAxis] == static_cast(shape_size(bias_pshape_norm.get_shape())))) { return true; + } } } } @@ -311,8 +331,10 @@ bool isSuitableChildForFusingMatMul(const std::shared_ptr& node, } if (ov::is_type(node)) { - if (one_of(node->get_output_element_type(0), ov::element::i8, ov::element::u8) && !canMatMulBeExecutedInI8) + if (one_of(node->get_output_element_type(0), ov::element::i8, ov::element::u8) && + !canMatMulBeExecutedInI8) { return false; + } } } @@ -328,13 +350,15 @@ bool isSuitableChildForFusingMatMul(const std::shared_ptr& node, NodeFusingType::FusedWithMatMul, NodeFusingType::FusedWithMatMulI8, NodeFusingType::FusedWithFC, - NodeFusingType::FusedWithFCI8)) + NodeFusingType::FusedWithFCI8)) { num_mm_inputs++; - else if (!ov::op::util::is_on_constant_path(parent_out)) + } else if (!ov::op::util::is_on_constant_path(parent_out)) { num_non_const_inputs++; + } } - if (num_non_const_inputs + num_mm_inputs != 1) + if (num_non_const_inputs + num_mm_inputs != 1) { return false; + } updatedChainType = NodeFusingType::FusedWithMisc; return true; @@ -343,20 +367,24 @@ bool isSuitableChildForFusingMatMul(const std::shared_ptr& node, return false; } bool isSuitableParentForFusingSumActivation(const std::shared_ptr& node) { - if (!ov::is_type(node)) + if (!ov::is_type(node)) { return false; + } auto isFusedBiasNode = [](const std::shared_ptr& n) { - if (!(ov::is_type(n) && GetNodeFusingType(n) == NodeFusingType::FusedWithConvolution)) + if (!(ov::is_type(n) && GetNodeFusingType(n) == NodeFusingType::FusedWithConvolution)) { return false; + } const auto conv = n->get_input_source_output(0); const auto bias = n->get_input_source_output(1); if (!(ov::is_type(bias.get_node_shared_ptr()) && - isSuitableConvolutionParent(conv.get_node_shared_ptr()))) + isSuitableConvolutionParent(conv.get_node_shared_ptr()))) { return false; + } const auto& conv_shape = conv.get_partial_shape(); const auto& bias_shape = bias.get_partial_shape(); - if (conv_shape.rank().is_dynamic()) + if (conv_shape.rank().is_dynamic()) { return false; + } auto getNormalizedDims = [](const ov::Shape& dims, size_t ndims) -> ov::Shape { ov::Shape normalizedDims = dims; for (size_t i = 0; i < (ndims - dims.size()); i++) { @@ -365,16 +393,19 @@ bool isSuitableParentForFusingSumActivation(const std::shared_ptr& n return normalizedDims; }; const auto bias_norm_dims = getNormalizedDims(bias_shape.get_shape(), conv_shape.size()); - if (conv_shape.size() != bias_norm_dims.size() || bias_norm_dims.size() < 2) + if (conv_shape.size() != bias_norm_dims.size() || bias_norm_dims.size() < 2) { return false; + } const auto channelAxis = 1; return conv_shape[channelAxis].is_static() && conv_shape[channelAxis].get_length() == static_cast(bias_norm_dims[channelAxis]) && bias_norm_dims[channelAxis] == static_cast(shape_size(bias_norm_dims)); }; auto isFusedFQNode = [&isFusedBiasNode](const std::shared_ptr& n) { - if (!(ov::is_type(n) && GetNodeFusingType(n) == NodeFusingType::FusedWithConvolution)) + if (!(ov::is_type(n) && + GetNodeFusingType(n) == NodeFusingType::FusedWithConvolution)) { return false; + } const auto& parent = n->get_input_node_shared_ptr(0); const bool is_suitable_parent = isSuitableConvolutionParent(parent) || isFusedBiasNode(parent) || (GetNodeFusingType(parent) == NodeFusingType::FusedWithConvolution); @@ -432,14 +463,16 @@ void MarkSubgraphOpAsSkipped(const std::shared_ptr& node) { } bool isSuitableConvert(const std::shared_ptr& node) { - if (!ov::is_type(node)) + if (!ov::is_type(node)) { return false; + } auto hasResult = [](const std::shared_ptr& node) { auto consumers = node->output(0).get_target_inputs(); bool findResult = false; if (consumers.size() == 1) { - if (ov::is_type(consumers.begin()->get_node())) + if (ov::is_type(consumers.begin()->get_node())) { findResult = true; + } } return findResult; }; @@ -467,8 +500,9 @@ bool SnippetsMarkSkipped::run_on_model(const std::shared_ptr& m) { RUN_ON_MODEL_SCOPE(SnippetsMarkSkipped); int channelAxis = DEFAULT_AXIS; for (auto& node : m->get_ordered_ops()) { - if (is_skipped_op(node)) + if (is_skipped_op(node)) { continue; + } // We perform this check separately because we mark here only weights path // Matmul itself will be checked further if (isSuitableMatMulWithConstantPath(node)) { @@ -520,8 +554,9 @@ bool SnippetsMarkSkipped::run_on_model(const std::shared_ptr& m) { } else { for (const auto fusingChainType : getContinuableChains(node)) { if (fusingChainType == NodeFusingType::FusedWithReduce) { - if (isSuitableReduceChild(node, channelAxis)) + if (isSuitableReduceChild(node, channelAxis)) { PropagateIfHasOnlyChild(node, fusingChainType); + } } else if (isSuitableChildForFusingSimple(node, channelAxis)) { PropagateIfHasOnlyChild(node, fusingChainType); } else if (fusingChainType == NodeFusingType::FusedWithConvolution || @@ -546,8 +581,9 @@ bool SnippetsMarkSkipped::run_on_model(const std::shared_ptr& m) { one_of(fusingChainType, NodeFusingType::FusedWithMatMulI8, NodeFusingType::FusedWithFCI8); // Handle fusings for both MatMul and FullyConnected NodeFusingType updatedChainType = fusingChainType; - if (isSuitableChildForFusingMatMul(node, isExecutedInINT8, updatedChainType, channelAxis)) + if (isSuitableChildForFusingMatMul(node, isExecutedInINT8, updatedChainType, channelAxis)) { PropagateIfHasOnlyChild(node, updatedChainType); + } } } } diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/shape_inference.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/shape_inference.cpp index 50a2399e93ecc4..5f6515b25759ad 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/shape_inference.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/shape_inference.cpp @@ -27,8 +27,9 @@ using ShapeInferPtr = IShapeInferSnippetsFactory::ShapeInferPtr; ShapeInferPtr CPUShapeInferSnippetsFactory::get_specific_op_shape_infer(const ov::DiscreteTypeInfo& key, const std::shared_ptr& op) const { const auto& maker_iter = specific_ops_registry.find(key); - if (maker_iter != specific_ops_registry.end()) + if (maker_iter != specific_ops_registry.end()) { return maker_iter->second(op); + } return {}; } diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/eltwise.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/eltwise.cpp index 44aaf251bc201f..2a1c125e430670 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/eltwise.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/eltwise.cpp @@ -72,20 +72,22 @@ libxsmm_bitfield BinaryEltwiseTPP::get_broadcasting_flags(const snippets::Vector } else { libxsmm_bitfield flags = LIBXSMM_MELTW_FLAG_BINARY_NONE; if (subshape_0[0] != subshape_1[0]) { - if (subshape_0[0] == 1) + if (subshape_0[0] == 1) { flags |= LIBXSMM_MELTW_FLAG_BINARY_BCAST_COL_IN_0; - else if (subshape_1[0] == 1) + } else if (subshape_1[0] == 1) { flags |= LIBXSMM_MELTW_FLAG_BINARY_BCAST_COL_IN_1; - else + } else { OPENVINO_THROW("Unsupported subshape combination: dim 0"); + } } if (subshape_0[1] != subshape_1[1]) { - if (subshape_0[1] == 1) + if (subshape_0[1] == 1) { flags |= LIBXSMM_MELTW_FLAG_BINARY_BCAST_ROW_IN_0; - else if (subshape_1[1] == 1) + } else if (subshape_1[1] == 1) { flags |= LIBXSMM_MELTW_FLAG_BINARY_BCAST_ROW_IN_1; - else + } else { OPENVINO_THROW("Unsupported subshape combination: dim 1"); + } } return flags; } diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/equation.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/equation.cpp index 04306ca8f8b6c5..af4e15dee1fa70 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/equation.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/equation.cpp @@ -15,8 +15,9 @@ EquationTPP::EquationTPP(const OutputVector& arguments, std::vector o m_op_descs(std::move(op_descs)) { // Initialize input/output ports as memory access ports std::set ma_iport_idx; - for (size_t i = 0; i < get_input_size(); i++) + for (size_t i = 0; i < get_input_size(); i++) { ma_iport_idx.insert(ma_iport_idx.end(), i); + } ctor_initialize(ma_iport_idx, std::set{0}); constructor_validate_and_infer_types(); } diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/factory.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/factory.cpp index e0e890a347a026..9457044de8fb76 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/factory.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/factory.cpp @@ -27,12 +27,13 @@ struct CustomPowerStaticBuilder : public NodeFactory::TPPCustomBuilder { const auto power = power_static->get_power(); const auto& input = n->input_value(0); std::shared_ptr tpp_node{nullptr}; - if (power == -1.f) + if (power == -1.f) { tpp_node = std::make_shared(input); - else if (power == 2.f) + } else if (power == 2.f) { tpp_node = std::make_shared(input); - else if (power == 0.5f) + } else if (power == 0.5f) { tpp_node = std::make_shared(input); + } OPENVINO_ASSERT(tpp_node, "Failed to create TPP in power_static_builder"); return tpp_node; }; diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.cpp index 571e292104d132..baa12967206839 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.cpp @@ -44,8 +44,9 @@ BrgemmToBrgemmTPP::BrgemmToBrgemmTPP() { OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "ov::intel_cpu::pass::BrgemmToBrgemmTPP") const auto node = m.get_match_root(); const auto brgemm = ov::as_type_ptr(node); - if (!brgemm || ov::as_type_ptr(node)) + if (!brgemm || ov::as_type_ptr(node)) { OPENVINO_THROW("BrgemmCPU cannot be in body before BrgemmToBrgemmTPP pass"); + } if (brgemm->is_dynamic()) { return false; @@ -63,8 +64,10 @@ BrgemmToBrgemmTPP::BrgemmToBrgemmTPP() { const auto& precision_b = brgemm->get_input_element_type(1); const auto& precision_c = brgemm->get_output_element_type(0); - if (!is_supported_brgemm_configuration({layout_a, layout_b, layout_c}, {precision_a, precision_b, precision_c})) + if (!is_supported_brgemm_configuration({layout_a, layout_b, layout_c}, + {precision_a, precision_b, precision_c})) { return false; + } const auto dimsMatMulIn0 = snippets::utils::get_planar_pshape(brgemm->input(0)).get_shape(); const auto dimsMatMulIn1 = snippets::utils::get_planar_pshape(brgemm->input(1)).get_shape(); diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.cpp index 63dd44ca133fa0..e63d28a9530bb6 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.cpp @@ -42,8 +42,9 @@ EltwiseToEltwiseTPP::EltwiseToEltwiseTPP() { const size_t N_block = ov::is_type(node) ? ov::snippets::utils::get_full_dim_value() : 64; ov::replace_node_update_name(node, tpp_eltwise); - for (size_t i = 0; i < node->get_input_size(); i++) + for (size_t i = 0; i < node->get_input_size(); i++) { ov::snippets::lowered::PortDescriptorUtils::set_port_descriptor(tpp_eltwise->input(i), {M_block, N_block}); + } ov::snippets::lowered::PortDescriptorUtils::set_port_descriptor(tpp_eltwise->output(0), {M_block, N_block}); diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/fuse_tpp_to_equations.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/fuse_tpp_to_equations.cpp index b64522154adc9e..7f281adcec80aa 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/fuse_tpp_to_equations.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/fuse_tpp_to_equations.cpp @@ -39,17 +39,20 @@ bool FuseTPPToEquations::fuse_from_root(const NodePtr& root, const std::shared_p // Note: we don't support exprs with more than 1 output yet. It's a technical limitation, but there are no use cases const auto tpp_root = get_tpp_op(root); - if (!tpp_root || !supported_num_out(root->output(0))) + if (!tpp_root || !supported_num_out(root->output(0))) { return false; + } const auto root_subtensor = PortDescriptorUtils::get_port_descriptor_ptr(root->output(0))->get_subtensor(); auto supported_subtensor = [&root_subtensor](const snippets::VectorDims& subtensor) { const auto size = subtensor.size(); - if (size != root_subtensor.size()) + if (size != root_subtensor.size()) { return false; + } for (size_t i = 0; i < size; i++) { - if (subtensor[i] != root_subtensor[i] && subtensor[i] != 1) + if (subtensor[i] != root_subtensor[i] && subtensor[i] != 1) { return false; + } } return true; }; diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.cpp index d9485b1c6b7b9d..62c2dc28987e56 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.cpp @@ -21,8 +21,9 @@ bool BrgemmTPPBlocking::SetBrgemmBeta::run(ov::snippets::lowered::LinearIR& line ov::snippets::lowered::LinearIR::constExprIt begin, ov::snippets::lowered::LinearIR::constExprIt end) { for (auto expr_it = begin; expr_it != end; ++expr_it) { - if (const auto brgemm = ov::as_type_ptr(expr_it->get()->get_node())) + if (const auto brgemm = ov::as_type_ptr(expr_it->get()->get_node())) { brgemm->set_beta(0); + } } return true; } diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.cpp index 42c30bb112263c..b55ccd0905eecd 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.cpp @@ -82,8 +82,9 @@ size_t get_leading_dim(ExpressionPort port, const snippets::lowered::LoopManager shape = port_desc->get_subtensor(); OPENVINO_ASSERT(ov::snippets::utils::is_planar_layout(layout), "Only planar layouts are supported for Buffers"); const auto rank_diff = static_cast(layout.size()) - static_cast(shape.size()); - if (rank_diff > 0) + if (rank_diff > 0) { layout.erase(layout.end() - rank_diff, layout.end()); + } } OPENVINO_ASSERT(layout.empty() || (layout.back() == layout.size() - 1 && layout.size() == shape.size()), @@ -123,16 +124,18 @@ bool SetTPPLeadingDim::run(snippets::lowered::LinearIR& linear_ir, snippets::lowered::LinearIR::constExprIt begin, snippets::lowered::LinearIR::constExprIt end) { OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::SetTPPLeadingDim") - if (linear_ir.empty()) + if (linear_ir.empty()) { return false; + } bool modified = false; for (auto expr_it = begin; expr_it != end; expr_it++) { const auto& expr = *expr_it; const auto& node = expr->get_node(); auto tpp_expr = std::dynamic_pointer_cast(node); - if (!tpp_expr) + if (!tpp_expr) { continue; + } OPENVINO_ASSERT(tpp_expr->is_full_memory_access_op(node), "TPP Op is expected to be MemoryAccess on all ports"); diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/scalar_to_scalar_tpp.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/scalar_to_scalar_tpp.cpp index 06ca575f314b4b..758afeb436deda 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/scalar_to_scalar_tpp.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/scalar_to_scalar_tpp.cpp @@ -31,8 +31,9 @@ ScalarToScalarTPP::ScalarToScalarTPP() { if (dynamic_cast(in.get_node())) num_connected_tpp++; } - if (num_connected_tpp == 0) + if (num_connected_tpp == 0) { return false; + } // Note: If needed, we can support cases when scalar has TPP and non-TPP consumers if we copy the scalar. // However, this is rarely needed in practice and the assert is here to flag invalid configurations. OPENVINO_ASSERT(num_connected_tpp == target_ins.size(), "Either all or none Scalar outputs should be TPP"); diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index da61917a146db0..18bc8dd5a61ec1 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -195,16 +195,19 @@ bool Transformations::is_decompression_multiply(const_node_ptr& node) const { }; const auto consumers = node->get_output_target_inputs(0); - if (all_has_type(consumers, ov::opset1::MatMul::get_type_info_static())) + if (all_has_type(consumers, ov::opset1::MatMul::get_type_info_static())) { return true; + } auto are_converts_from_decompression = [&all_has_type](const std::set>& consumers) { - if (!all_has_type(consumers, ov::opset1::Convert::get_type_info_static())) + if (!all_has_type(consumers, ov::opset1::Convert::get_type_info_static())) { return false; + } for (const auto& consumer : consumers) { const auto child_consumers = consumer.get_node()->get_output_target_inputs(0); - if (!all_has_type(child_consumers, ov::opset1::MatMul::get_type_info_static())) + if (!all_has_type(child_consumers, ov::opset1::MatMul::get_type_info_static())) { return false; + } } return true; }; @@ -223,12 +226,14 @@ bool Transformations::is_decompression_multiply(const_node_ptr& node) const { bool Transformations::fuse_type_to_fq(const std::shared_ptr& node, const precisions_map& precisions) { auto fq = ov::as_type_ptr(node); - if (!fq) + if (!fq) { return false; + } const auto& from = node->get_output_element_type(0); auto it = precisions.find(from); - if (it == precisions.end()) + if (it == precisions.end()) { return false; + } const auto& to = it->second; for (size_t i = 0; i < node->get_input_size(); ++i) { @@ -251,8 +256,9 @@ bool Transformations::fuse_type_to_fq(const std::shared_ptr& node, con bool Transformations::fuse_type_to_pa(const std::shared_ptr& node, const precisions_map& precisions) { auto pa = ov::as_type_ptr(node); - if (!pa) + if (!pa) { return false; + } // PagedAttentionExtension's 2nd output type should be kept f32. // The reason is that the pagedattention node in CPU plugin hardcodes 2nd output type as f32. // So, set f32 to the 2nd output type, which can avoid extra data type conversion during transformation. @@ -262,12 +268,14 @@ bool Transformations::fuse_type_to_pa(const std::shared_ptr& node, con bool Transformations::fuse_type_to_convert(const std::shared_ptr& node, const precisions_map& precisions) { auto convert = ov::as_type_ptr(node); - if (!convert) + if (!convert) { return false; + } const auto& from = node->get_output_element_type(0); auto it = precisions.find(from); - if (it == precisions.end()) + if (it == precisions.end()) { return false; + } const auto& to = it->second; if (convert->get_convert_element_type() == ov::element::boolean && to.is_integral_number()) { @@ -318,8 +326,9 @@ void Transformations::UpToLpt() { PreLpt(defaultPrecisions); - if (useLpt) + if (useLpt) { Lpt(defaultPrecisions); + } } void Transformations::CpuSpecificOpSet(void) { @@ -394,8 +403,9 @@ void Transformations::PreLpt(const std::vector& defaultPrecis {ov::element::u4, ov::element::u8}}; // @todo should we always convert to f32 regardless of hardware support, as it is done for f16? - if (!hasHardwareSupport(ov::element::bf16)) + if (!hasHardwareSupport(ov::element::bf16)) { map.insert({ov::element::bf16, ov::element::f32}); + } // TODO: Remove 'hasHardwareSupport' when all nodes are able to handle f16 properly. if (!one_of(config.inferencePrecision, element::f16, element::undefined) || !hasHardwareSupport(element::f16)) { map.insert({ov::element::f16, ov::element::f32}); @@ -625,8 +635,9 @@ void Transformations::PreLpt(const std::vector& defaultPrecis if (concurrency > snippets_work_amount) return false; size_t spatial_dim = 1; - for (size_t i = 2; i < shape.size(); ++i) + for (size_t i = 2; i < shape.size(); ++i) { spatial_dim = spatial_dim * shape[i]; + } size_t snippets_tensor_size = spatial_dim * shape[1] / num_groups * node->get_element_type().size(); size_t cache_size_l1 = dnnl::utils::get_cache_size(1, true); if (snippets_tensor_size > cache_size_l1) { @@ -910,8 +921,9 @@ void Transformations::PostLpt() { MLPFusion); size_t concurrency = config.streamExecutorConfig.get_threads_per_stream(); - if (concurrency == 0) + if (concurrency == 0) { concurrency = parallel_get_max_threads(); + } CPU_REGISTER_PASS_X64(postLPTPassManager, QKVProjFusion); CPU_SET_CALLBACK_X64( @@ -973,8 +985,9 @@ void Transformations::MainSnippets(void) { return false; }; - if (config.snippetsMode == Config::SnippetsMode::Disable || !is_supported_isa()) + if (config.snippetsMode == Config::SnippetsMode::Disable || !is_supported_isa()) { return; + } // TODO [123659] Implement common logic to split optimization and limitation conditions const auto ignoreCallback = config.snippetsMode == Config::SnippetsMode::IgnoreCallback; @@ -987,8 +1000,9 @@ void Transformations::MainSnippets(void) { // Plugin disables Transpose tokenization on output bool mha_token_enable_transpose_on_output = one_of(config.inferencePrecision, element::f32, element::undefined); size_t concurrency = config.streamExecutorConfig.get_threads_per_stream(); - if (concurrency == 0) + if (concurrency == 0) { concurrency = parallel_get_max_threads(); + } // Runtime caching should be enabled in case of dynamic Subgraphs in CPU Plugin: to reduce overheads of // ShapeInference and CodeGeneration If runtime cache capacity is zero, it means that rtCache won't be used and we @@ -1051,8 +1065,9 @@ void Transformations::MainSnippets(void) { #if defined(OPENVINO_ARCH_X86_64) auto is_supported_matmul = [this](const std::shared_ptr& n) { const auto matmul = ov::as_type_ptr(n); - if (!matmul) + if (!matmul) { return false; + } const auto in_type0 = matmul->get_input_element_type(0); const auto in_type1 = matmul->get_input_element_type(1); const auto is_fp32 = (in_type0 == ov::element::f32 && in_type1 == ov::element::f32 && @@ -1064,26 +1079,32 @@ void Transformations::MainSnippets(void) { ((in_type0 == element::f32 && in_type1 == ov::element::f32 && config.inferencePrecision == ov::element::bf16)); const auto is_int8 = in_type0 == ov::element::i8; - if (matmul->get_transpose_a()) + if (matmul->get_transpose_a()) { return false; - if (is_fp32) + } + if (is_fp32) { return dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2); - if (is_int8) + } + if (is_int8) { return dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx) || dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_vnni) || dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni); - if (is_bf16) + } + if (is_bf16) { return dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx) || dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16); - if (is_fp16) + } + if (is_fp16) { return dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx_fp16); + } return true; }; auto is_unsupported_parallel_work_amount = [&](const std::shared_ptr& n, const ov::PartialShape& shape) { // SplitDimensionM transformation doesn't support dynamic shapes, so M dim is split in runtime configurator - if (shape.is_dynamic()) + if (shape.is_dynamic()) { return false; + } const auto parallel_work_amount = std::accumulate(shape.rbegin() + 2, shape.rend(), ov::Dimension(1), std::multiplies()); // Ticket 160154: enable tokenization for MHA with insufficient parallel work amount @@ -1146,8 +1167,9 @@ void Transformations::MainSnippets(void) { if (!ignoreCallback) { // Check for supported ranks // todo: clarify whether we can evaluate snippets on inputs with larger ranks - if (t.get_partial_shape().rank().get_length() > 6) + if (t.get_partial_shape().rank().get_length() > 6) { return false; + } } return supported_element_types.count(t.get_element_type()) != 0 || @@ -1220,10 +1242,11 @@ void Transformations::MainSnippets(void) { auto mm_supports_transpose_b = [this, ignoreCallback](const std::shared_ptr& n) { MAYBE_UNUSED(config.inferencePrecision); - if (!ignoreCallback) + if (!ignoreCallback) { return false; - // Note: BrgemmTPP doesn't support transposed KN natively - // so we should extract transposes for the corresponding matmul nodes + } + // Note: BrgemmTPP doesn't support transposed KN natively + // so we should extract transposes for the corresponding matmul nodes #if defined(SNIPPETS_LIBXSMM_TPP) // TPP doesn't support dynamic shapes -> there will be BrgemmCPU node if (n->is_dynamic()) @@ -1282,8 +1305,9 @@ void Transformations::PostSnippets(void) { void Transformations::Snippets(void) { const bool useSnippets = config.snippetsMode != Config::SnippetsMode::Disable && CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(config.debugCaps, Snippets); - if (!useSnippets) + if (!useSnippets) { return; + } CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, Snippets); MainSnippets(); diff --git a/src/plugins/intel_cpu/src/transformations/utils.cpp b/src/plugins/intel_cpu/src/transformations/utils.cpp index c0167aaff2f0ce..52f44cef47ee27 100644 --- a/src/plugins/intel_cpu/src/transformations/utils.cpp +++ b/src/plugins/intel_cpu/src/transformations/utils.cpp @@ -16,27 +16,32 @@ bool has_matmul_with_compressed_weights(const std::shared_ptr& bool has_decompression_multiply = false; auto is_decompression_multiply = [&](ov::Node* node) { if (auto multiply = ov::as_type(node)) { - if (ov::is_dequantization_node(multiply->shared_from_this())) + if (ov::is_dequantization_node(multiply->shared_from_this())) { has_decompression_multiply = true; + } } }; for (const auto& op : model->get_ops()) { - if (!ov::is_type(op) && !ov::is_type(op)) + if (!ov::is_type(op) && !ov::is_type(op)) { continue; + } - if (!op->get_input_element_type(0).is_real()) + if (!op->get_input_element_type(0).is_real()) { continue; + } auto weights = op->input_value(1); - if (!ov::op::util::is_on_constant_path(weights)) + if (!ov::op::util::is_on_constant_path(weights)) { continue; + } std::unordered_set visited; ov::op::util::visit_constant_path(weights.get_node(), visited, is_decompression_multiply); - if (has_decompression_multiply) + if (has_decompression_multiply) { return true; + } } return false; } diff --git a/src/plugins/intel_cpu/src/utils/bfloat16.hpp b/src/plugins/intel_cpu/src/utils/bfloat16.hpp index 1f7ffdc10af062..f2c7c3b47e86f0 100644 --- a/src/plugins/intel_cpu/src/utils/bfloat16.hpp +++ b/src/plugins/intel_cpu/src/utils/bfloat16.hpp @@ -36,7 +36,7 @@ class bfloat16_t { {} operator float() const { - return F32{uint32_t(m_value) << 16}.vfloat; + return F32{static_cast(m_value) << 16}.vfloat; } static constexpr bfloat16_t from_bits(uint16_t bits) { return bfloat16_t(bits, true); diff --git a/src/plugins/intel_cpu/src/utils/blob_dump.cpp b/src/plugins/intel_cpu/src/utils/blob_dump.cpp index abb321791aa1d8..b2788085c549f7 100644 --- a/src/plugins/intel_cpu/src/utils/blob_dump.cpp +++ b/src/plugins/intel_cpu/src/utils/blob_dump.cpp @@ -32,10 +32,10 @@ struct IEB_HEADER { unsigned char scaling_axis; // FF - no scaling unsigned char reserved[3]; - unsigned long data_offset; - unsigned long data_size; - unsigned long scaling_data_offset; - unsigned long scaling_data_size; + uint64_t data_offset; + uint64_t data_size; + uint64_t scaling_data_offset; + uint64_t scaling_data_size; }; static IEB_HEADER prepare_header(const MemoryDesc& desc) { @@ -52,13 +52,15 @@ static IEB_HEADER prepare_header(const MemoryDesc& desc) { header.precision = static_cast(ov::element::Type_t(desc.getPrecision())); - if (desc.getShape().getRank() > 7) + if (desc.getShape().getRank() > 7) { OPENVINO_THROW("Dumper support max 7D blobs"); + } header.ndims = desc.getShape().getRank(); const auto& dims = desc.getShape().getStaticDims(); - for (int i = 0; i < header.ndims; i++) + for (int i = 0; i < header.ndims; i++) { header.dims[i] = dims[i]; + } header.scaling_axis = NO_SCALES; @@ -67,16 +69,19 @@ static IEB_HEADER prepare_header(const MemoryDesc& desc) { static DnnlBlockedMemoryDesc parse_header(IEB_HEADER& header) { if (header.magic[0] != IEB_MAGIC[0] || header.magic[1] != IEB_MAGIC[1] || header.magic[2] != IEB_MAGIC[2] || - header.magic[3] != IEB_MAGIC[3]) + header.magic[3] != IEB_MAGIC[3]) { OPENVINO_THROW("Dumper cannot parse file. Wrong format."); + } - if (header.ver[0] != 0 || header.ver[1] != 1) + if (header.ver[0] != 0 || header.ver[1] != 1) { OPENVINO_THROW("Dumper cannot parse file. Unsupported IEB format version."); + } const auto prc = static_cast(header.precision); VectorDims dims(header.ndims); - for (int i = 0; i < header.ndims; i++) + for (int i = 0; i < header.ndims; i++) { dims[i] = header.dims[i]; + } return DnnlBlockedMemoryDesc{prc, Shape(dims)}; } @@ -101,30 +106,34 @@ void BlobDumper::prepare_plain_data(const MemoryPtr& memory, std::vector(data.data()); auto* blob_ptr = reinterpret_cast(ptr); - for (size_t i = 0; i < data_size; i++) + for (size_t i = 0; i < data_size; i++) { pln_blob_ptr[i] = blob_ptr[desc.getElementOffset(i)]; + } break; } case ov::element::bf16: { auto* pln_blob_ptr = reinterpret_cast(data.data()); auto* blob_ptr = reinterpret_cast(ptr); - for (size_t i = 0; i < data_size; i++) + for (size_t i = 0; i < data_size; i++) { pln_blob_ptr[i] = blob_ptr[desc.getElementOffset(i)]; + } break; } case ov::element::f16: { auto* pln_blob_ptr = reinterpret_cast(data.data()); auto* blob_ptr = reinterpret_cast(ptr); - for (size_t i = 0; i < data_size; i++) + for (size_t i = 0; i < data_size; i++) { pln_blob_ptr[i] = blob_ptr[desc.getElementOffset(i)]; + } break; } case ov::element::i8: case ov::element::u8: { auto* pln_blob_ptr = reinterpret_cast(data.data()); auto* blob_ptr = reinterpret_cast(ptr); - for (size_t i = 0; i < data_size; i++) + for (size_t i = 0; i < data_size; i++) { pln_blob_ptr[i] = blob_ptr[desc.getElementOffset(i)]; + } break; } default: @@ -133,8 +142,9 @@ void BlobDumper::prepare_plain_data(const MemoryPtr& memory, std::vectorgetDesc()); std::vector data; @@ -150,8 +160,9 @@ void BlobDumper::dump(std::ostream& stream) const { } void BlobDumper::dumpAsTxt(std::ostream& stream) const { - if (memory == nullptr) + if (memory == nullptr) { OPENVINO_THROW("Dumper cannot dump. Memory is not allocated."); + } const auto& desc = memory->getDesc(); const auto dims = desc.getShape().getStaticDims(); @@ -160,24 +171,27 @@ void BlobDumper::dumpAsTxt(std::ostream& stream) const { // Header like "U8 4D shape: 2 3 224 224 () stream << memory->getDesc().getPrecision().get_type_name() << " " << dims.size() << "D " << "shape: "; - for (size_t d : dims) + for (size_t d : dims) { stream << d << " "; + } stream << "(" << data_size << ")" - << " by address 0x" << std::hex << memory->getDataAs() << std::dec << std::endl; + << " by address 0x" << std::hex << memory->getDataAs() << std::dec << std::endl; const void* ptr = memory->getData(); switch (desc.getPrecision()) { case ov::element::f32: { auto* blob_ptr = reinterpret_cast(ptr); - for (size_t i = 0; i < data_size; i++) + for (size_t i = 0; i < data_size; i++) { stream << blob_ptr[desc.getElementOffset(i)] << std::endl; + } break; } case ov::element::i32: { auto* blob_ptr = reinterpret_cast(ptr); - for (size_t i = 0; i < data_size; i++) + for (size_t i = 0; i < data_size; i++) { stream << blob_ptr[desc.getElementOffset(i)] << std::endl; + } break; } case ov::element::bf16: { @@ -190,44 +204,51 @@ void BlobDumper::dumpAsTxt(std::ostream& stream) const { } case ov::element::f16: { auto* blob_ptr = reinterpret_cast(ptr); - for (size_t i = 0; i < data_size; i++) + for (size_t i = 0; i < data_size; i++) { stream << blob_ptr[desc.getElementOffset(i)] << std::endl; + } break; } case ov::element::i8: { auto* blob_ptr = reinterpret_cast(ptr); - for (size_t i = 0; i < data_size; i++) + for (size_t i = 0; i < data_size; i++) { stream << static_cast(blob_ptr[desc.getElementOffset(i)]) << std::endl; + } break; } case ov::element::u8: { auto* blob_ptr = reinterpret_cast(ptr); - for (size_t i = 0; i < data_size; i++) + for (size_t i = 0; i < data_size; i++) { stream << static_cast(blob_ptr[desc.getElementOffset(i)]) << std::endl; + } break; } case ov::element::i64: { auto* blob_ptr = reinterpret_cast(ptr); - for (size_t i = 0; i < data_size; i++) + for (size_t i = 0; i < data_size; i++) { stream << blob_ptr[desc.getElementOffset(i)] << std::endl; + } break; } case ov::element::u32: { auto* blob_ptr = reinterpret_cast(ptr); - for (size_t i = 0; i < data_size; i++) + for (size_t i = 0; i < data_size; i++) { stream << blob_ptr[desc.getElementOffset(i)] << std::endl; + } break; } case ov::element::u16: { auto* blob_ptr = reinterpret_cast(ptr); - for (size_t i = 0; i < data_size; i++) + for (size_t i = 0; i < data_size; i++) { stream << blob_ptr[desc.getElementOffset(i)] << std::endl; + } break; } case ov::element::i16: { auto* blob_ptr = reinterpret_cast(ptr); - for (size_t i = 0; i < data_size; i++) + for (size_t i = 0; i < data_size; i++) { stream << blob_ptr[desc.getElementOffset(i)] << std::endl; + } break; } default: @@ -252,8 +273,9 @@ BlobDumper BlobDumper::read(std::istream& stream) { BlobDumper BlobDumper::read(const std::string& file_path) { std::ifstream file; file.open(file_path); - if (!file.is_open()) + if (!file.is_open()) { OPENVINO_THROW("Dumper cannot open file ", file_path); + } auto res = read(file); file.close(); @@ -263,8 +285,9 @@ BlobDumper BlobDumper::read(const std::string& file_path) { void BlobDumper::dump(const std::string& dump_path) const { std::ofstream dump_file; dump_file.open(dump_path, std::ios::binary); - if (!dump_file.is_open()) + if (!dump_file.is_open()) { OPENVINO_THROW("Dumper cannot create dump file ", dump_path); + } dump(dump_file); dump_file.close(); @@ -273,8 +296,9 @@ void BlobDumper::dump(const std::string& dump_path) const { void BlobDumper::dumpAsTxt(const std::string& dump_path) const { std::ofstream dump_file; dump_file.open(dump_path); - if (!dump_file.is_open()) + if (!dump_file.is_open()) { OPENVINO_THROW("Dumper cannot create dump file ", dump_path); + } dumpAsTxt(dump_file); dump_file.close(); diff --git a/src/plugins/intel_cpu/src/utils/cpu_utils.hpp b/src/plugins/intel_cpu/src/utils/cpu_utils.hpp index 059f67b6e3a0e3..dac0c2f1c10bb2 100644 --- a/src/plugins/intel_cpu/src/utils/cpu_utils.hpp +++ b/src/plugins/intel_cpu/src/utils/cpu_utils.hpp @@ -37,8 +37,9 @@ struct is_any_of * @return normalized vector */ inline std::vector getNormalizedDimsBySize(const VectorDims& dims, size_t ndims) { - if (dims.size() >= ndims) + if (dims.size() >= ndims) { return dims; + } std::vector normalizedDims = dims; for (size_t i = 0; i < (ndims - dims.size()); i++) { @@ -63,23 +64,30 @@ inline bool isPerTensorOrPerChannelBroadcastable(const VectorDims& firstInputDim bool weakComparison = false) { bool (*dimsEqual)(size_t, size_t) = weakComparison ? static_cast(dimsEqualWeak) : static_cast(dimsEqualStrong); - if (secondInputDims.size() > firstInputDims.size()) + if (secondInputDims.size() > firstInputDims.size()) { return false; - if (std::accumulate(secondInputDims.begin(), secondInputDims.end(), size_t(1), std::multiplies()) == 1) + } + if (std::accumulate(secondInputDims.begin(), + secondInputDims.end(), + static_cast(1), + std::multiplies()) == 1) { return true; + } std::vector normalizedSecondInputDims = getNormalizedDimsBySize(secondInputDims, firstInputDims.size()); if (channelAxis >= 0) { for (size_t i = 0; i < normalizedSecondInputDims.size(); i++) { if ((i == static_cast(channelAxis) && !dimsEqual(normalizedSecondInputDims[i], firstInputDims[i])) || - (i != static_cast(channelAxis) && normalizedSecondInputDims[i] != 1)) + (i != static_cast(channelAxis) && normalizedSecondInputDims[i] != 1)) { return false; + } } } else { for (size_t i = 0; i < normalizedSecondInputDims.size(); i++) { - if (normalizedSecondInputDims[i] != 1) + if (normalizedSecondInputDims[i] != 1) { return false; + } } } return true; @@ -95,8 +103,9 @@ inline ov::element::Type normalizeToSupportedPrecision(ov::element::Type precisi switch (precision) { case ov::element::bf16: case ov::element::f16: { - if (!hasHardwareSupport(precision)) + if (!hasHardwareSupport(precision)) { precision = ov::element::f32; + } } case ov::element::u8: case ov::element::i8: diff --git a/src/plugins/intel_cpu/src/utils/ngraph_utils.hpp b/src/plugins/intel_cpu/src/utils/ngraph_utils.hpp index f35c7bb8b60711..e3dee961341f1f 100644 --- a/src/plugins/intel_cpu/src/utils/ngraph_utils.hpp +++ b/src/plugins/intel_cpu/src/utils/ngraph_utils.hpp @@ -36,8 +36,9 @@ inline std::string getImplPriorityValue(const std::shared_ptr& node) { template inline const std::shared_ptr getNgraphOpAs(const std::shared_ptr& op) { auto typedOp = ov::as_type_ptr(op); - if (!typedOp) + if (!typedOp) { OPENVINO_THROW("Can't get ngraph node ", op->get_type_name(), " with name ", op->get_friendly_name()); + } return typedOp; } diff --git a/src/plugins/intel_cpu/src/utils/plain_tensor.hpp b/src/plugins/intel_cpu/src/utils/plain_tensor.hpp index a27f29c0ab0e1b..c9b610a8f567af 100644 --- a/src/plugins/intel_cpu/src/utils/plain_tensor.hpp +++ b/src/plugins/intel_cpu/src/utils/plain_tensor.hpp @@ -110,8 +110,9 @@ struct PlainTensor { } size_t size(int i) const { - if (i < 0) + if (i < 0) { i += m_rank; + } assert(static_cast::type>(i) < m_rank); return m_dims[i]; } @@ -127,8 +128,9 @@ struct PlainTensor { template std::vector get_strides() const { std::vector strides(m_rank); - for (size_t i = 0; i < m_rank; i++) + for (size_t i = 0; i < m_rank; i++) { strides[i] = static_cast(m_strides[i]); + } return strides; } @@ -187,14 +189,17 @@ struct PlainTensor { tensor_index(int start, int end = INT_MIN, int step = 1) : start(start), end(end), step(step) {} void regularize(int size) { - if (start < 0) + if (start < 0) { start += size; + } assert(start >= 0 && start < size); if (end != INT_MIN) { - if (end < 0) + if (end < 0) { end += size; - if (end > size) + } + if (end > size) { end = size; + } assert(end >= 0 && end <= size); count = (end - start + step - 1) / step; } else { @@ -272,8 +277,9 @@ struct PlainTensor { // check if it's dense tensor size_t stride = 1; for (int i = m_rank - 1; i >= 0; i--) { - if (m_strides[i] != stride) + if (m_strides[i] != stride) { return false; + } stride *= m_dims[i]; } return true; @@ -428,8 +434,9 @@ struct PlainTensor { for (int i = m_rank - 1; i >= 0; i--) { if (index[i] >= m_dims[i]) { // carry on - if (i == 0) + if (i == 0) { return *this; + } index[i] = 0; index[i - 1]++; } @@ -453,8 +460,9 @@ struct PlainTensor { match = true; auto it = expect_dims.begin(); for (size_t i = 0; i < m_rank; ++i, ++it) { - if (*it == 0 && special_zero) + if (*it == 0 && special_zero) { continue; + } if (*it != m_dims[i]) { match = false; break; @@ -465,8 +473,9 @@ struct PlainTensor { if (!match) { std::stringstream ss; ss << " m_dims=["; - for (size_t i = 0; i < m_rank; i++) + for (size_t i = 0; i < m_rank; i++) { ss << m_dims[i] << ","; + } ss << "] expect_dims=["; for (auto& i : expect_dims) ss << i << ","; @@ -498,8 +507,9 @@ struct PlainTensor { sep = ","; } ss << "] {"; - if (m_rank > 1) + if (m_rank > 1) { ss << "\n"; + } auto last_dim_size = m_dims[m_rank - 1]; int row_id = 0; int cur_row_lines_left = lines_per_row; @@ -515,32 +525,34 @@ struct PlainTensor { // display current element if we still have buget if (cur_row_lines_left > 0) { - if (m_dt == ov::element::Type_t::f32) + if (m_dt == ov::element::Type_t::f32) { ss << (ptr())[i] << ","; - else if (m_dt == ov::element::Type_t::bf16) + } else if (m_dt == ov::element::Type_t::bf16) { ss << (ptr())[i] << ","; - else if (m_dt == ov::element::Type_t::f16) + } else if (m_dt == ov::element::Type_t::f16) { ss << (ptr())[i] << ","; - else if (m_dt == ov::element::Type_t::i32) + } else if (m_dt == ov::element::Type_t::i32) { ss << (ptr())[i] << ","; - else if (m_dt == ov::element::Type_t::i8) + } else if (m_dt == ov::element::Type_t::i8) { ss << (ptr())[i] << ","; - else if (m_dt == ov::element::Type_t::u8) + } else if (m_dt == ov::element::Type_t::u8) { ss << (ptr())[i] << ","; - else if (m_dt == ov::element::Type_t::boolean) + } else if (m_dt == ov::element::Type_t::boolean) { ss << static_cast((ptr())[i]) << ","; - else + } else { ss << "?,"; + } cur_line_elecnt++; cur_row_elecnt++; if (((cur_line_elecnt % 16) == 15 || (cur_row_elecnt == last_dim_size)) && (m_rank > 1)) { max_total_lines--; cur_row_lines_left--; if (cur_row_lines_left == 0) { - if (cur_row_elecnt == last_dim_size) + if (cur_row_elecnt == last_dim_size) { ss << ",\n"; - else + } else { ss << "...\n"; + } cur_row_elecnt = 0; } else { ss << "\n\t\t"; diff --git a/src/plugins/intel_cpu/src/utils/precision_support.cpp b/src/plugins/intel_cpu/src/utils/precision_support.cpp index 64c107d578e7a7..48c5cb826d297f 100644 --- a/src/plugins/intel_cpu/src/utils/precision_support.cpp +++ b/src/plugins/intel_cpu/src/utils/precision_support.cpp @@ -20,8 +20,9 @@ namespace intel_cpu { static bool hasFP16HardwareSupport(const ov::element::Type& precision) { #if defined(OPENVINO_ARCH_X86_64) if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_fp16) || - dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2)) + dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2)) { return true; + } return false; #elif defined(OV_CPU_WITH_ACL) return arm_compute::CPUInfo::get().has_fp16(); @@ -33,8 +34,9 @@ static bool hasFP16HardwareSupport(const ov::element::Type& precision) { static bool hasBF16HardwareSupport(const ov::element::Type& precision) { #if defined(OPENVINO_ARCH_X86_64) if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core) || - dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2)) + dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2)) { return true; + } return false; #else return false; @@ -53,10 +55,12 @@ bool hasHardwareSupport(const ov::element::Type& precision) { } ov::element::Type defaultFloatPrecision() { - if (hasHardwareSupport(ov::element::f16)) + if (hasHardwareSupport(ov::element::f16)) { return ov::element::f16; - if (hasHardwareSupport(ov::element::bf16)) + } + if (hasHardwareSupport(ov::element::bf16)) { return ov::element::bf16; + } return ov::element::f32; } diff --git a/src/plugins/intel_cpu/src/weights_cache.cpp b/src/plugins/intel_cpu/src/weights_cache.cpp index 5c4caaeec257c0..d9c55c2b253b8d 100644 --- a/src/plugins/intel_cpu/src/weights_cache.cpp +++ b/src/plugins/intel_cpu/src/weights_cache.cpp @@ -60,8 +60,9 @@ WeightsSharing::SharedMemory::Ptr WeightsSharing::get(const std::string& key) co std::unique_lock lock(guard); auto found = sharedWeights.find(key); - if (found == sharedWeights.end() || !((ptr = found->second) && (newPtr = ptr->sharedMemory.lock()))) + if (found == sharedWeights.end() || !((ptr = found->second) && (newPtr = ptr->sharedMemory.lock()))) { OPENVINO_THROW("Unknown shared memory with key ", key); + } } return std::make_shared(ptr->valid.load(std::memory_order_relaxed) ? std::unique_lock(ptr->guard, std::defer_lock) @@ -72,21 +73,24 @@ WeightsSharing::SharedMemory::Ptr WeightsSharing::get(const std::string& key) co SocketsWeights::SocketsWeights() { int num_sockets = get_num_sockets(); - for (int socket_id = 0; socket_id < num_sockets; socket_id++) + for (int socket_id = 0; socket_id < num_sockets; socket_id++) { _cache_map[socket_id] = std::make_shared(); + } } WeightsSharing::Ptr& SocketsWeights::operator[](int socket_id) { auto found = _cache_map.find(socket_id); - if (found == _cache_map.end()) + if (found == _cache_map.end()) { OPENVINO_THROW("Unknown socket id ", socket_id); + } return found->second; } const WeightsSharing::Ptr& SocketsWeights::operator[](int socket_id) const { auto found = _cache_map.find(socket_id); - if (found == _cache_map.end()) + if (found == _cache_map.end()) { OPENVINO_THROW("Unknown socket id ", socket_id); + } return found->second; }