diff --git a/.github/workflows/code_style.yml b/.github/workflows/code_style.yml index 3969da2b97c5a1..97b399b1abf48d 100644 --- a/.github/workflows/code_style.yml +++ b/.github/workflows/code_style.yml @@ -24,9 +24,11 @@ jobs: sudo apt update sudo apt --assume-yes install clang-format-15 - # Run cmake with -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT in order to enable codestyle check for ITT collector + # Run cmake with extra options to cover as much source code as possible: + # - -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT to enable codestyle check for ITT collector + # - -DENABLE_SNIPPETS_LIBXSMM_TPP to cover snippets TPP adaptation - name: CMake configure - run: cmake -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT -B build + run: cmake -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT -DENABLE_SNIPPETS_LIBXSMM_TPP=ON -B build - name: Create code style diff run: cmake --build build --target clang_format_fix_all -j8 @@ -54,9 +56,11 @@ jobs: sudo apt update sudo apt --assume-yes install binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu scons clang-format-15 - # Run cmake with -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT in order to enable codestyle check for ITT collector + # Run cmake with extra options to cover as much source code as possible: + # - -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT to enable codestyle check for ITT collector + # - -DENABLE_SNIPPETS_LIBXSMM_TPP to cover snippets TPP adaptation - name: CMake configure - run: cmake -DENABLE_CLANG_FORMAT=ON -DENABLE_TESTS=ON -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT -DCMAKE_TOOLCHAIN_FILE=cmake/arm64.toolchain.cmake -B build_arm64 + run: cmake -DENABLE_CLANG_FORMAT=ON -DENABLE_TESTS=ON -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT -DENABLE_SNIPPETS_LIBXSMM_TPP=ON -DCMAKE_TOOLCHAIN_FILE=cmake/arm64.toolchain.cmake -B build_arm64 - name: Create code style diff run: cmake --build build_arm64 --target clang_format_fix_all -j8 diff --git a/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_brgemm_emitter.cpp b/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_brgemm_emitter.cpp index 0fcb394a8a5bde..e873d7f7aa98eb 100644 --- a/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_brgemm_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_brgemm_emitter.cpp @@ -3,6 +3,7 @@ // #include "jit_brgemm_emitter.hpp" + #include "emitters/snippets/x64/jit_snippets_emitters.hpp" #include "transformations/tpp/x64/op/brgemm.hpp" @@ -28,18 +29,15 @@ BrgemmTppEmitter::BrgemmTppEmitter(jit_generator* h, cpu_isa_t isa, const Expres const auto& input_1_desc = expr->get_input_port_descriptor(1); const auto& output_desc = expr->get_output_port_descriptor(0); - std::vector leading_dimensions {brgemm_node->get_input_stride(0), - brgemm_node->get_input_stride(1), - brgemm_node->get_output_stride(0)}; + std::vector leading_dimensions{brgemm_node->get_input_stride(0), + brgemm_node->get_input_stride(1), + brgemm_node->get_output_stride(0)}; auto in_0_prec = ov_to_xsmm_dtype(brgemm_node->get_input_element_type(0)); auto in_1_prec = ov_to_xsmm_dtype(brgemm_node->get_input_element_type(1)); - exec_dtype = in_0_prec == LIBXSMM_DATATYPE_I8 || in_0_prec == LIBXSMM_DATATYPE_U8 ? - LIBXSMM_DATATYPE_I32 : - LIBXSMM_DATATYPE_F32; - auto out_0_prec = exec_dtype == LIBXSMM_DATATYPE_I32 ? - LIBXSMM_DATATYPE_I32 : - LIBXSMM_DATATYPE_F32; + exec_dtype = in_0_prec == LIBXSMM_DATATYPE_I8 || in_0_prec == LIBXSMM_DATATYPE_U8 ? LIBXSMM_DATATYPE_I32 + : LIBXSMM_DATATYPE_F32; + auto out_0_prec = exec_dtype == LIBXSMM_DATATYPE_I32 ? LIBXSMM_DATATYPE_I32 : LIBXSMM_DATATYPE_F32; const auto beta = brgemm_node->get_beta(); OV_CPU_JIT_EMITTER_ASSERT(beta == 0 || beta == 1, "Detected unsupported beta value: " + std::to_string(beta)); @@ -54,18 +52,14 @@ BrgemmTppEmitter::BrgemmTppEmitter(jit_generator* h, cpu_isa_t isa, const Expres const auto N = static_cast(*subtensor_in1.rbegin()); const bool is_f32_gemm = in_0_prec == in_1_prec && in_0_prec == LIBXSMM_DATATYPE_F32; - const bool is_bf16_gemm = in_0_prec == in_1_prec && in_0_prec == LIBXSMM_DATATYPE_BF16; + const bool is_bf16_gemm = in_0_prec == in_1_prec && in_0_prec == LIBXSMM_DATATYPE_BF16; const bool is_i8_gemm = in_0_prec == LIBXSMM_DATATYPE_U8 || in_0_prec == LIBXSMM_DATATYPE_I8; - OV_CPU_JIT_EMITTER_ASSERT(is_f32_gemm || - (is_bf16_gemm && K % 2 == 0) || - (is_i8_gemm && K % 4 == 0), + OV_CPU_JIT_EMITTER_ASSERT(is_f32_gemm || (is_bf16_gemm && K % 2 == 0) || (is_i8_gemm && K % 4 == 0), "Unsupported parameter combination for kernel configuration"); - m_compile_flags = is_f32_gemm ? - LIBXSMM_GEMM_FLAGS('N', 'N') : - LIBXSMM_GEMM_VNNI_FLAGS('N', 'N', 'V', 'N') | - LIBXSMM_GEMM_FLAG_NO_SETUP_TILECONFIG | - LIBXSMM_GEMM_FLAG_NO_RESET_TILECONFIG; + m_compile_flags = is_f32_gemm ? LIBXSMM_GEMM_FLAGS('N', 'N') + : LIBXSMM_GEMM_VNNI_FLAGS('N', 'N', 'V', 'N') | + LIBXSMM_GEMM_FLAG_NO_SETUP_TILECONFIG | LIBXSMM_GEMM_FLAG_NO_RESET_TILECONFIG; if (beta == 0) m_compile_flags |= LIBXSMM_GEMM_FLAG_BETA_0; @@ -79,9 +73,15 @@ BrgemmTppEmitter::BrgemmTppEmitter(jit_generator* h, cpu_isa_t isa, const Expres m_compile_flags |= LIBXSMM_GEMM_FLAG_B_UNSIGNED; } - m_shape = libxsmm_create_gemm_shape(N, M, K, - io_strides[1], io_strides[0], io_strides[2], - in_1_prec, in_0_prec, out_0_prec, + m_shape = libxsmm_create_gemm_shape(N, + M, + K, + io_strides[1], + io_strides[0], + io_strides[2], + in_1_prec, + in_0_prec, + out_0_prec, exec_dtype); m_prefetching_flags = LIBXSMM_GEMM_PREFETCH_NONE; } @@ -91,7 +91,7 @@ std::set> BrgemmTppEmitter::get_supported_precisions( return {{element::f32, element::f32}}; } -void BrgemmTppEmitter::validate_arguments(const std::vector &in, const std::vector &out) const { +void BrgemmTppEmitter::validate_arguments(const std::vector& in, const std::vector& out) const { OV_CPU_JIT_EMITTER_ASSERT(in.size() == 2, "Expects 2 input regs, got" + std::to_string(in.size())); OV_CPU_JIT_EMITTER_ASSERT(out.size() == 1, "Expects 1 output reg, got" + std::to_string(out.size())); } @@ -100,7 +100,7 @@ const uintptr_t BrgemmTppEmitter::get_compiled_kernel_ptr() const { return COMPILE_TPP_KERNEL(libxsmm_dispatch_gemm(m_shape, m_compile_flags, m_prefetching_flags)); } -void BrgemmTppEmitter::execute_brgemm_kernel(libxsmm_gemmfunction brg_kernel, void *in0, void *in1, void *out0) { +void BrgemmTppEmitter::execute_brgemm_kernel(libxsmm_gemmfunction brg_kernel, void* in0, void* in1, void* out0) { libxsmm_gemm_param gemm_p; gemm_p.a.primary = in1; gemm_p.b.primary = in0; diff --git a/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_scalar_emitter.cpp b/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_scalar_emitter.cpp index 5b156100073534..fa38eacb4e870b 100644 --- a/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_scalar_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_scalar_emitter.cpp @@ -3,6 +3,7 @@ // #include "jit_scalar_emitter.hpp" + #include "emitters/snippets/x64/jit_snippets_emitters.hpp" using namespace Xbyak; diff --git a/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_scalar_emitter.hpp b/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_scalar_emitter.hpp index e8235f6b86e88c..c59153fdb3aaec 100644 --- a/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_scalar_emitter.hpp +++ b/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_scalar_emitter.hpp @@ -3,8 +3,8 @@ // #pragma once -#include "snippets/lowered/expression.hpp" #include "emitters/plugin/x64/jit_emitter.hpp" +#include "snippets/lowered/expression.hpp" namespace ov { namespace intel_cpu { @@ -13,11 +13,16 @@ class ScalarTppEmitter : public jit_emitter { ScalarTppEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa, const ov::snippets::lowered::ExpressionPtr& expr); - size_t get_inputs_num() const override {return 0;} - size_t aux_gprs_count() const override {return 1;} + size_t get_inputs_num() const override { + return 0; + } + size_t aux_gprs_count() const override { + return 1; + } + private: void emit_impl(const std::vector& in, const std::vector& out) const override; }; -} // namespace intel_cpu -} // namespace ov +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/brgemm.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/brgemm.cpp index d9f0bc947db958..4c5f2925ef0735 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/brgemm.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/brgemm.cpp @@ -3,50 +3,60 @@ // #include "brgemm.hpp" + #include "snippets/itt.hpp" -#include "snippets/utils/utils.hpp" #include "snippets/lowered/port_descriptor.hpp" +#include "snippets/utils/utils.hpp" #include "utils/general_utils.h" - namespace ov { namespace intel_cpu { namespace tpp { namespace op { -BrgemmTPP::BrgemmTPP(const Output& A, const Output& B, - const size_t offset_a, const size_t offset_b, const size_t offset_c, - std::vector layout_a, std::vector layout_b, std::vector layout_c, +BrgemmTPP::BrgemmTPP(const Output& A, + const Output& B, + const size_t offset_a, + const size_t offset_b, + const size_t offset_c, + std::vector layout_a, + std::vector layout_b, + std::vector layout_c, const float beta) : MemoryAccess(std::set{0, 1}, std::set{0}), modifier::TensorProcessingPrimitive(), - Brgemm(A, B, - offset_a, offset_b, offset_c, - std::move(layout_a), std::move(layout_b), std::move(layout_c)) { + Brgemm(A, B, offset_a, offset_b, offset_c, std::move(layout_a), std::move(layout_b), std::move(layout_c)) { set_beta(beta); } -BrgemmTPP::BrgemmTPP(const Output& A, const Output& B, - const PortDescriptor& desc_a, const PortDescriptor& desc_b, const PortDescriptor& desc_c, - std::vector layout_a, std::vector layout_b, std::vector layout_c, +BrgemmTPP::BrgemmTPP(const Output& A, + const Output& B, + const PortDescriptor& desc_a, + const PortDescriptor& desc_b, + const PortDescriptor& desc_c, + std::vector layout_a, + std::vector layout_b, + std::vector layout_c, const float beta) : MemoryAccess(PortMap{{0, desc_a}, {1, desc_b}}, PortMap{{0, desc_c}}), modifier::TensorProcessingPrimitive(), - Brgemm(A, B, - desc_a, desc_b, desc_c, - std::move(layout_a), std::move(layout_b), std::move(layout_c)) { + Brgemm(A, B, desc_a, desc_b, desc_c, std::move(layout_a), std::move(layout_b), std::move(layout_c)) { set_beta(beta); } std::shared_ptr BrgemmTPP::clone_with_new_inputs(const OutputVector& new_args) const { INTERNAL_OP_SCOPE(BrgemmTPP_clone_with_new_inputs); check_new_args_count(this, new_args); - return std::make_shared(new_args.at(0), new_args.at(1), - get_input_port_descriptor(0), get_input_port_descriptor(1), get_output_port_descriptor(0), - snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(input(0))->get_layout(), - snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(input(1))->get_layout(), - snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(output(0))->get_layout(), - m_beta); + return std::make_shared( + new_args.at(0), + new_args.at(1), + get_input_port_descriptor(0), + get_input_port_descriptor(1), + get_output_port_descriptor(0), + snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(input(0))->get_layout(), + snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(input(1))->get_layout(), + snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(output(0))->get_layout(), + m_beta); } bool BrgemmTPP::visit_attributes(AttributeVisitor& visitor) { @@ -55,7 +65,7 @@ bool BrgemmTPP::visit_attributes(AttributeVisitor& visitor) { return Brgemm::visit_attributes(visitor); } -} // namespace op -} // namespace tpp -} // namespace intel_cpu -} // namespace ov +} // namespace op +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/brgemm.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/brgemm.hpp index c9199c3c7f82df..4a147f79b2a37e 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/brgemm.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/brgemm.hpp @@ -4,8 +4,8 @@ #pragma once -#include "transformations/snippets/x64/op/brgemm_cpu.hpp" #include "modifiers.hpp" +#include "transformations/snippets/x64/op/brgemm_cpu.hpp" namespace ov { namespace intel_cpu { @@ -22,13 +22,23 @@ class BrgemmTPP : virtual public modifier::TensorProcessingPrimitive, public sni public: OPENVINO_OP("Brgemm", "TppOpset", snippets::op::Brgemm); - BrgemmTPP(const Output& A, const Output& B, - size_t offset_a = 0, size_t offset_b = 0, size_t offset_c = 0, - std::vector layout_a = {}, std::vector layout_b = {}, std::vector layout_c = {}, + BrgemmTPP(const Output& A, + const Output& B, + size_t offset_a = 0, + size_t offset_b = 0, + size_t offset_c = 0, + std::vector layout_a = {}, + std::vector layout_b = {}, + std::vector layout_c = {}, float beta = 1); - BrgemmTPP(const Output& A, const Output& B, - const PortDescriptor& desc_a, const PortDescriptor& desc_b, const PortDescriptor& desc_c, - std::vector layout_a = {}, std::vector layout_b = {}, std::vector layout_c = {}, + BrgemmTPP(const Output& A, + const Output& B, + const PortDescriptor& desc_a, + const PortDescriptor& desc_b, + const PortDescriptor& desc_c, + std::vector layout_a = {}, + std::vector layout_b = {}, + std::vector layout_c = {}, float beta = 1); BrgemmTPP() = default; @@ -36,14 +46,18 @@ class BrgemmTPP : virtual public modifier::TensorProcessingPrimitive, public sni bool visit_attributes(AttributeVisitor& visitor) override; - float get_beta() const { return m_beta; } - void set_beta(float beta) { m_beta = beta; } + float get_beta() const { + return m_beta; + } + void set_beta(float beta) { + m_beta = beta; + } private: float m_beta = 0.f; }; -} // namespace op -} // namespace tpp -} // namespace intel_cpu -} // namespace ov +} // namespace op +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/descriptor.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/descriptor.cpp index a5f297a491af8b..7cfbba2aeb5be3 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/descriptor.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/descriptor.cpp @@ -11,63 +11,63 @@ namespace op { std::ostream& operator<<(std::ostream& os, const OpDescTPP& od) { switch (od.m_arity) { - case OpDescTPP::ARITY::ZERO: - os << "ARG#" << static_cast(od.m_value); + case OpDescTPP::ARITY::ZERO: + os << "ARG#" << static_cast(od.m_value); + break; + case OpDescTPP::ARITY::UNARY: + switch (static_cast(od.m_value)) { + case LIBXSMM_MELTW_TYPE_UNARY_EXP: + os << "EXP"; break; - case OpDescTPP::ARITY::UNARY: - switch (static_cast(od.m_value)) { - case LIBXSMM_MELTW_TYPE_UNARY_EXP: - os << "EXP"; - break; - case LIBXSMM_MELTW_TYPE_UNARY_X2: - os << "SQ"; - break; - case LIBXSMM_MELTW_TYPE_UNARY_SQRT: - os << "SQRT"; - break; - case LIBXSMM_MELTW_TYPE_UNARY_RELU: - os << "RELU"; - break; - case LIBXSMM_MELTW_TYPE_UNARY_RECIPROCAL: - os << "RECIPROCAL"; - break; - case LIBXSMM_MELTW_TYPE_UNARY_REDUCE_X_OP_ADD: - os << "REDUCE_ADD"; - break; - case LIBXSMM_MELTW_TYPE_UNARY_REDUCE_X_OP_MAX: - os << "REDUCE_MAX"; - break; - default: - OPENVINO_THROW("Unsupported TPP Unary op type for serialization"); - } + case LIBXSMM_MELTW_TYPE_UNARY_X2: + os << "SQ"; break; - case OpDescTPP::ARITY::BINARY: - switch (static_cast(od.m_value)) { - case LIBXSMM_MELTW_TYPE_BINARY_ADD: - os << "ADD"; - break; - case LIBXSMM_MELTW_TYPE_BINARY_SUB: - os << "SUB"; - break; - case LIBXSMM_MELTW_TYPE_BINARY_MUL: - os << "MUL"; - break; - case LIBXSMM_MELTW_TYPE_BINARY_DIV: - os << "DIV"; - break; - default: - OPENVINO_THROW("Unsupported TPP Binary op type for serialization"); - } + case LIBXSMM_MELTW_TYPE_UNARY_SQRT: + os << "SQRT"; break; - case OpDescTPP::ARITY::UNDEFINED: - os << "Undefined"; + case LIBXSMM_MELTW_TYPE_UNARY_RELU: + os << "RELU"; + break; + case LIBXSMM_MELTW_TYPE_UNARY_RECIPROCAL: + os << "RECIPROCAL"; + break; + case LIBXSMM_MELTW_TYPE_UNARY_REDUCE_X_OP_ADD: + os << "REDUCE_ADD"; + break; + case LIBXSMM_MELTW_TYPE_UNARY_REDUCE_X_OP_MAX: + os << "REDUCE_MAX"; + break; + default: + OPENVINO_THROW("Unsupported TPP Unary op type for serialization"); + } + break; + case OpDescTPP::ARITY::BINARY: + switch (static_cast(od.m_value)) { + case LIBXSMM_MELTW_TYPE_BINARY_ADD: + os << "ADD"; + break; + case LIBXSMM_MELTW_TYPE_BINARY_SUB: + os << "SUB"; + break; + case LIBXSMM_MELTW_TYPE_BINARY_MUL: + os << "MUL"; + break; + case LIBXSMM_MELTW_TYPE_BINARY_DIV: + os << "DIV"; break; default: - OPENVINO_THROW("Unhandled ARITY"); + OPENVINO_THROW("Unsupported TPP Binary op type for serialization"); + } + break; + case OpDescTPP::ARITY::UNDEFINED: + os << "Undefined"; + break; + default: + OPENVINO_THROW("Unhandled ARITY"); } return os; } -} // namespace op -} // namespace tpp -} // namespace intel_cpu -} // namespace ov +} // namespace op +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/descriptor.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/descriptor.hpp index e6aac272a905b7..f205c5f58bde46 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/descriptor.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/descriptor.hpp @@ -14,16 +14,20 @@ namespace op { class OpDescTPP { public: // Note: zero arity represent equation arguments - enum class ARITY {UNDEFINED, UNARY, BINARY, ZERO}; + enum class ARITY { UNDEFINED, UNARY, BINARY, ZERO }; OpDescTPP() = default; // Note: for zero arity op_type is interpreted as the argument index (op inputs and args have different order) OpDescTPP(ARITY arity, int arg_idx) : m_arity(arity), m_value{arg_idx}, m_flags{0} { OPENVINO_ASSERT(m_arity == ARITY::ZERO, "Only zero-arity op descs could be created directly"); } - explicit OpDescTPP(libxsmm_meltw_binary_type op_type, libxsmm_bitfield flags = LIBXSMM_MELTW_FLAG_BINARY_NONE) : - m_arity{ARITY::BINARY}, m_value{op_type}, m_flags{flags} {} - explicit OpDescTPP(libxsmm_meltw_unary_type op_type, libxsmm_bitfield flags = LIBXSMM_MELTW_FLAG_UNARY_NONE) : - m_arity{ARITY::UNARY}, m_value{op_type}, m_flags{flags} {} + explicit OpDescTPP(libxsmm_meltw_binary_type op_type, libxsmm_bitfield flags = LIBXSMM_MELTW_FLAG_BINARY_NONE) + : m_arity{ARITY::BINARY}, + m_value{op_type}, + m_flags{flags} {} + explicit OpDescTPP(libxsmm_meltw_unary_type op_type, libxsmm_bitfield flags = LIBXSMM_MELTW_FLAG_UNARY_NONE) + : m_arity{ARITY::UNARY}, + m_value{op_type}, + m_flags{flags} {} operator libxsmm_meltw_binary_type() const { OPENVINO_ASSERT(m_arity == ARITY::BINARY, "Unsupported TPP OpDesc conversion"); return static_cast(m_value); @@ -36,17 +40,21 @@ class OpDescTPP { OPENVINO_ASSERT(m_arity == ARITY::ZERO, "Unsupported TPP OpDesc conversion"); return m_value; } - ARITY get_arity() const { return m_arity; } - libxsmm_bitfield get_flags() const { return m_flags; } + ARITY get_arity() const { + return m_arity; + } + libxsmm_bitfield get_flags() const { + return m_flags; + } friend std::ostream& operator<<(std::ostream& os, const OpDescTPP& od); private: - const ARITY m_arity {ARITY::UNDEFINED}; - const int m_value {-1}; - const libxsmm_bitfield m_flags {0}; + const ARITY m_arity{ARITY::UNDEFINED}; + const int m_value{-1}; + const libxsmm_bitfield m_flags{0}; }; -} // namespace op -} // namespace tpp -} // namespace intel_cpu -} // namespace ov +} // namespace op +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/eltwise.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/eltwise.cpp index 1df57fdd5a8f4f..44aaf251bc201f 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/eltwise.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/eltwise.cpp @@ -9,31 +9,28 @@ namespace intel_cpu { namespace tpp { namespace op { -#define GENERAL_AUX_METHODS(OP, OP_TYPE, ...) \ - std::shared_ptr OP::clone_with_new_inputs(const OutputVector& new_args) const {\ - check_new_args_count(this, new_args);\ - const auto& new_op = std::make_shared(__VA_ARGS__);\ - new_op->clone_memory_access_ports(*this);\ - return new_op;\ -} \ - bool OP::visit_attributes(AttributeVisitor& visitor) {\ - return OP_TYPE::visit_attributes(visitor);\ -}\ +#define GENERAL_AUX_METHODS(OP, OP_TYPE, ...) \ + std::shared_ptr OP::clone_with_new_inputs(const OutputVector& new_args) const { \ + check_new_args_count(this, new_args); \ + const auto& new_op = std::make_shared(__VA_ARGS__); \ + new_op->clone_memory_access_ports(*this); \ + return new_op; \ + } \ + bool OP::visit_attributes(AttributeVisitor& visitor) { return OP_TYPE::visit_attributes(visitor); } // Note: Unary Ops don't require broadcasting flags update => no need to override validate_and_infer_types -#define BINARY_AUX_METHODS(BINARY_OP, OV_OP) GENERAL_AUX_METHODS(BINARY_OP, BinaryEltwiseTPP, new_args.at(0), new_args.at(1), this->get_autob())\ -void BINARY_OP::validate_and_infer_types() {\ - OV_OP::validate_and_infer_types();\ - m_flags = get_broadcasting_flags(get_input_partial_shape(0), get_input_partial_shape(1));\ -} +#define BINARY_AUX_METHODS(BINARY_OP, OV_OP) \ + GENERAL_AUX_METHODS(BINARY_OP, BinaryEltwiseTPP, new_args.at(0), new_args.at(1), this->get_autob()) \ + void BINARY_OP::validate_and_infer_types() { \ + OV_OP::validate_and_infer_types(); \ + m_flags = get_broadcasting_flags(get_input_partial_shape(0), get_input_partial_shape(1)); \ + } #define UNARY_AUX_METHODS(UNARY_OP) GENERAL_AUX_METHODS(UNARY_OP, UnaryEltwiseTPP, new_args.at(0)) bool EltwiseTPP::is_supported(const std::shared_ptr& node) { - return ov::is_type(node) || - ov::is_type(node) || - ov::is_type(node) || - ov::is_type(node); + return ov::is_type(node) || ov::is_type(node) || + ov::is_type(node) || ov::is_type(node); } bool EltwiseTPP::visit_attributes(AttributeVisitor& visitor) { @@ -46,13 +43,14 @@ BinaryEltwiseTPP::BinaryEltwiseTPP(libxsmm_meltw_binary_type op_type) : EltwiseT ctor_initialize(std::set{0, 1}, std::set{0}); } - - -libxsmm_bitfield BinaryEltwiseTPP::get_broadcasting_flags(const ov::PartialShape& pshape_0, const ov::PartialShape& pshape_1) { - return get_broadcasting_flags(snippets::utils::pshape_to_vdims(pshape_0), snippets::utils::pshape_to_vdims(pshape_1)); +libxsmm_bitfield BinaryEltwiseTPP::get_broadcasting_flags(const ov::PartialShape& pshape_0, + const ov::PartialShape& pshape_1) { + return get_broadcasting_flags(snippets::utils::pshape_to_vdims(pshape_0), + snippets::utils::pshape_to_vdims(pshape_1)); } -libxsmm_bitfield BinaryEltwiseTPP::get_broadcasting_flags(const snippets::VectorDims& shape_0, const snippets::VectorDims& shape_1) { +libxsmm_bitfield BinaryEltwiseTPP::get_broadcasting_flags(const snippets::VectorDims& shape_0, + const snippets::VectorDims& shape_1) { auto get_subshape = [](const snippets::VectorDims& shape) { snippets::VectorDims subshape(2, 1); for (size_t i = 0; i < std::min(subshape.size(), shape.size()); i++) { @@ -63,8 +61,7 @@ libxsmm_bitfield BinaryEltwiseTPP::get_broadcasting_flags(const snippets::Vector snippets::VectorDims subshape_0 = get_subshape(shape_0); snippets::VectorDims subshape_1 = get_subshape(shape_1); - if (snippets::utils::is_dynamic_vdims(subshape_0) || - snippets::utils::is_dynamic_vdims(subshape_1)) + if (snippets::utils::is_dynamic_vdims(subshape_0) || snippets::utils::is_dynamic_vdims(subshape_1)) return LIBXSMM_MELTW_FLAG_BINARY_NONE; if (subshape_0 == subshape_1) { return LIBXSMM_MELTW_FLAG_BINARY_NONE; @@ -99,62 +96,64 @@ UnaryEltwiseTPP::UnaryEltwiseTPP(libxsmm_meltw_unary_type op_type) : EltwiseTPP( } Add::Add(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) -: BinaryEltwiseTPP(LIBXSMM_MELTW_TYPE_BINARY_ADD), ov::op::v1::Add(arg0, arg1, auto_broadcast) { + : BinaryEltwiseTPP(LIBXSMM_MELTW_TYPE_BINARY_ADD), + ov::op::v1::Add(arg0, arg1, auto_broadcast) { m_flags = get_broadcasting_flags(arg0.get_partial_shape(), arg1.get_partial_shape()); } BINARY_AUX_METHODS(Add, ov::op::v1::Add) Subtract::Subtract(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) - : BinaryEltwiseTPP(LIBXSMM_MELTW_TYPE_BINARY_SUB), ov::op::v1::Subtract(arg0, arg1, auto_broadcast) { + : BinaryEltwiseTPP(LIBXSMM_MELTW_TYPE_BINARY_SUB), + ov::op::v1::Subtract(arg0, arg1, auto_broadcast) { m_flags = get_broadcasting_flags(arg0.get_partial_shape(), arg1.get_partial_shape()); } BINARY_AUX_METHODS(Subtract, ov::op::v1::Subtract) Multiply::Multiply(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) - : BinaryEltwiseTPP(LIBXSMM_MELTW_TYPE_BINARY_MUL), ov::op::v1::Multiply(arg0, arg1, auto_broadcast) { + : BinaryEltwiseTPP(LIBXSMM_MELTW_TYPE_BINARY_MUL), + ov::op::v1::Multiply(arg0, arg1, auto_broadcast) { m_flags = get_broadcasting_flags(arg0.get_partial_shape(), arg1.get_partial_shape()); } BINARY_AUX_METHODS(Multiply, ov::op::v1::Multiply) Divide::Divide(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) - : BinaryEltwiseTPP(LIBXSMM_MELTW_TYPE_BINARY_DIV), ov::op::v1::Divide(arg0, arg1, auto_broadcast) { + : BinaryEltwiseTPP(LIBXSMM_MELTW_TYPE_BINARY_DIV), + ov::op::v1::Divide(arg0, arg1, auto_broadcast) { m_flags = get_broadcasting_flags(arg0.get_partial_shape(), arg1.get_partial_shape()); } BINARY_AUX_METHODS(Divide, ov::op::v1::Divide) -Exp::Exp(const Output& arg0) : UnaryEltwiseTPP(LIBXSMM_MELTW_TYPE_UNARY_EXP), ov::op::v0::Exp(arg0) { -} +Exp::Exp(const Output& arg0) : UnaryEltwiseTPP(LIBXSMM_MELTW_TYPE_UNARY_EXP), ov::op::v0::Exp(arg0) {} UNARY_AUX_METHODS(Exp) -Relu::Relu(const Output& arg0) : UnaryEltwiseTPP(LIBXSMM_MELTW_TYPE_UNARY_RELU), ov::op::v0::Relu(arg0) { -} +Relu::Relu(const Output& arg0) : UnaryEltwiseTPP(LIBXSMM_MELTW_TYPE_UNARY_RELU), ov::op::v0::Relu(arg0) {} UNARY_AUX_METHODS(Relu) -Reciprocal::Reciprocal(const Output& arg) : - UnaryEltwiseTPP(LIBXSMM_MELTW_TYPE_UNARY_RECIPROCAL), ov::snippets::op::PowerStatic(arg, -1.f) { -} +Reciprocal::Reciprocal(const Output& arg) + : UnaryEltwiseTPP(LIBXSMM_MELTW_TYPE_UNARY_RECIPROCAL), + ov::snippets::op::PowerStatic(arg, -1.f) {} UNARY_AUX_METHODS(Reciprocal) -Square::Square(const Output& arg) : - UnaryEltwiseTPP(LIBXSMM_MELTW_TYPE_UNARY_X2), ov::snippets::op::PowerStatic(arg, 2.f) { -} +Square::Square(const Output& arg) + : UnaryEltwiseTPP(LIBXSMM_MELTW_TYPE_UNARY_X2), + ov::snippets::op::PowerStatic(arg, 2.f) {} UNARY_AUX_METHODS(Square) -SquareRoot::SquareRoot(const Output& arg) : - UnaryEltwiseTPP(LIBXSMM_MELTW_TYPE_UNARY_SQRT), ov::snippets::op::PowerStatic(arg, 0.5f) { -} +SquareRoot::SquareRoot(const Output& arg) + : UnaryEltwiseTPP(LIBXSMM_MELTW_TYPE_UNARY_SQRT), + ov::snippets::op::PowerStatic(arg, 0.5f) {} UNARY_AUX_METHODS(SquareRoot) -} // namespace op -} // namespace tpp -} // namespace intel_cpu -} // namespace ov +} // namespace op +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/eltwise.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/eltwise.hpp index a61668c2a04328..7338450ff8257d 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/eltwise.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/eltwise.hpp @@ -4,18 +4,17 @@ #pragma once +#include "descriptor.hpp" #include "modifiers.hpp" #include "openvino/op/add.hpp" -#include "openvino/op/subtract.hpp" -#include "openvino/op/multiply.hpp" #include "openvino/op/divide.hpp" #include "openvino/op/exp.hpp" +#include "openvino/op/multiply.hpp" #include "openvino/op/relu.hpp" +#include "openvino/op/subtract.hpp" #include "snippets/op/powerstatic.hpp" #include "snippets/utils/utils.hpp" -#include "descriptor.hpp" - namespace ov { namespace intel_cpu { namespace tpp { @@ -27,17 +26,20 @@ class EltwiseTPP : public modifier::TensorProcessingPrimitive { public: static bool is_supported(const std::shared_ptr& node); bool visit_attributes(AttributeVisitor& visitor); - virtual OpDescTPP get_op_desc() const = 0; + virtual OpDescTPP get_op_desc() const = 0; }; class BinaryEltwiseTPP : public EltwiseTPP { public: BinaryEltwiseTPP(libxsmm_meltw_binary_type op_type); - OpDescTPP get_op_desc() const override { return OpDescTPP(m_op_type, m_flags); } + OpDescTPP get_op_desc() const override { + return OpDescTPP(m_op_type, m_flags); + } protected: static libxsmm_bitfield get_broadcasting_flags(const ov::PartialShape& pshape_0, const ov::PartialShape& pshape_1); - static libxsmm_bitfield get_broadcasting_flags(const snippets::VectorDims& pshape_0, const snippets::VectorDims& pshape_1); + static libxsmm_bitfield get_broadcasting_flags(const snippets::VectorDims& pshape_0, + const snippets::VectorDims& pshape_1); libxsmm_bitfield m_flags; libxsmm_meltw_binary_type m_op_type; }; @@ -45,7 +47,10 @@ class BinaryEltwiseTPP : public EltwiseTPP { class UnaryEltwiseTPP : public EltwiseTPP { public: UnaryEltwiseTPP(libxsmm_meltw_unary_type op_type); - OpDescTPP get_op_desc() const override { return OpDescTPP(m_op_type); } + OpDescTPP get_op_desc() const override { + return OpDescTPP(m_op_type); + } + private: libxsmm_meltw_unary_type m_op_type; }; @@ -110,7 +115,6 @@ class Reciprocal : public UnaryEltwiseTPP, public ov::snippets::op::PowerStatic bool visit_attributes(AttributeVisitor& visitor) override; }; - class Square : public UnaryEltwiseTPP, public ov::snippets::op::PowerStatic { public: OPENVINO_OP("Square", "TppOpset", snippets::op::PowerStatic); @@ -127,7 +131,7 @@ class SquareRoot : public UnaryEltwiseTPP, public ov::snippets::op::PowerStatic bool visit_attributes(AttributeVisitor& visitor) override; }; -} // namespace op -} // namespace tpp -} // namespace intel_cpu -} // namespace ov +} // namespace op +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/equation.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/equation.cpp index 8a22aa400cc16c..04306ca8f8b6c5 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/equation.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/equation.cpp @@ -9,9 +9,10 @@ namespace intel_cpu { namespace tpp { namespace op { -EquationTPP::EquationTPP(const OutputVector& arguments, std::vector op_descs) : - modifier::TensorProcessingPrimitive(), ov::op::Op(arguments), - m_op_descs(std::move(op_descs)) { +EquationTPP::EquationTPP(const OutputVector& arguments, std::vector op_descs) + : modifier::TensorProcessingPrimitive(), + ov::op::Op(arguments), + m_op_descs(std::move(op_descs)) { // Initialize input/output ports as memory access ports std::set ma_iport_idx; for (size_t i = 0; i < get_input_size(); i++) @@ -43,13 +44,14 @@ void EquationTPP::validate_and_infer_types() { for (size_t i = 1; i < get_input_size(); i++) { OPENVINO_ASSERT(element::Type::merge(etype, etype, get_input_element_type(i)), "Incompatible element types in TPP equation"); - OPENVINO_ASSERT(ov::PartialShape::broadcast_merge_into(shape, get_input_partial_shape(i), ov::op::AutoBroadcastType::NUMPY), - "Incompatible element types in TPP equation"); + OPENVINO_ASSERT( + ov::PartialShape::broadcast_merge_into(shape, get_input_partial_shape(i), ov::op::AutoBroadcastType::NUMPY), + "Incompatible element types in TPP equation"); } set_output_type(0, etype, shape); } -} // namespace op -} // namespace tpp -} // namespace intel_cpu -} // namespace ov +} // namespace op +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/equation.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/equation.hpp index 4ba53393336ad4..bf16f149b415de 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/equation.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/equation.hpp @@ -4,9 +4,9 @@ #pragma once +#include "descriptor.hpp" #include "modifiers.hpp" #include "openvino/op/op.hpp" -#include "descriptor.hpp" namespace ov { namespace intel_cpu { @@ -20,13 +20,15 @@ class EquationTPP : public modifier::TensorProcessingPrimitive, public ov::op::O std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; void validate_and_infer_types() override; bool visit_attributes(AttributeVisitor& visitor) override; - const std::vector& get_op_descs() { return m_op_descs; } + const std::vector& get_op_descs() { + return m_op_descs; + } private: std::vector m_op_descs; }; -} // namespace op -} // namespace tpp -} // namespace intel_cpu -} // namespace ov +} // namespace op +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/factory.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/factory.cpp index 3fdcd30e7c4eb6..e0e890a347a026 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/factory.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/factory.cpp @@ -3,10 +3,11 @@ // #include "factory.hpp" + #include "eltwise.hpp" -#include "reduce.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "ov_ops/type_relaxed.hpp" +#include "reduce.hpp" namespace ov { namespace intel_cpu { @@ -38,37 +39,39 @@ struct CustomPowerStaticBuilder : public NodeFactory::TPPCustomBuilder { } }; -} // namespace -#define CREATE_UNARY_TPP_NODE(tpp_node_type) \ - [](const std::shared_ptr& node) -> std::shared_ptr { \ +} // namespace +#define CREATE_UNARY_TPP_NODE(tpp_node_type) \ + [](const std::shared_ptr& node) -> std::shared_ptr { \ return std::make_shared(node->get_input_source_output(0)); \ } -#define CREATE_BINARY_TPP_NODE(tpp_node_type) \ - [](const std::shared_ptr& node) -> std::shared_ptr { \ - return std::make_shared(node->get_input_source_output(0), node->get_input_source_output(1), node->get_autob()); \ +#define CREATE_BINARY_TPP_NODE(tpp_node_type) \ + [](const std::shared_ptr& node) -> std::shared_ptr { \ + return std::make_shared(node->get_input_source_output(0), \ + node->get_input_source_output(1), \ + node->get_autob()); \ } -#define CREATE_REDUCE_TPP_NODE(tpp_node_type) \ - [](const std::shared_ptr& node) -> std::shared_ptr { \ - const auto& reduce = ov::as_type_ptr(node); \ - OPENVINO_ASSERT(reduce, "Attempt to create TPP Reduce from invalid node"); \ +#define CREATE_REDUCE_TPP_NODE(tpp_node_type) \ + [](const std::shared_ptr& node) -> std::shared_ptr { \ + const auto& reduce = ov::as_type_ptr(node); \ + OPENVINO_ASSERT(reduce, "Attempt to create TPP Reduce from invalid node"); \ return std::make_shared(reduce->get_input_source_output(0), reduce->get_axis()); \ } -std::unordered_map NodeFactory::m_direct_mapping { +std::unordered_map NodeFactory::m_direct_mapping{ {ov::op::v1::Add::get_type_info_static(), CREATE_BINARY_TPP_NODE(Add)}, {ov::op::v1::Subtract::get_type_info_static(), CREATE_BINARY_TPP_NODE(Subtract)}, {ov::op::v1::Multiply::get_type_info_static(), CREATE_BINARY_TPP_NODE(Multiply)}, {ov::op::v1::Divide::get_type_info_static(), CREATE_BINARY_TPP_NODE(Divide)}, {ov::op::v0::Exp::get_type_info_static(), CREATE_UNARY_TPP_NODE(Exp)}, {ov::op::v0::Relu::get_type_info_static(), CREATE_UNARY_TPP_NODE(Relu)}, - // Note that we don't support conversion from ngraph ops here, since they have a broader semantics (e.g. multiple axis provided at a secont input) + // Note that we don't support conversion from ngraph ops here, since they have a broader semantics (e.g. multiple + // axis provided at a secont input) {ov::snippets::op::ReduceMax::get_type_info_static(), CREATE_REDUCE_TPP_NODE(ReduceMax)}, {ov::snippets::op::ReduceSum::get_type_info_static(), CREATE_REDUCE_TPP_NODE(ReduceSum)}, }; - std::vector NodeFactory::m_custom_mapping{CustomPowerStaticBuilder()}; std::shared_ptr NodeFactory::create(const std::shared_ptr& n) { @@ -95,13 +98,16 @@ bool NodeFactory::is_supported(const std::shared_ptr& n) { // Note: verify that TypeRelaxed property is maintained (mismatched input precisions) // after low precisions are enabled (ticket: 132328) const auto& ins = n->inputs(); - auto is_fp32_input = [](const ov::Input& in){ return in.get_element_type() == element::f32; }; + auto is_fp32_input = [](const ov::Input& in) { + return in.get_element_type() == element::f32; + }; const bool all_inputs_fp32 = std::all_of(ins.begin(), ins.end(), is_fp32_input); return (m_direct_mapping.count(n->get_type_info()) || - std::any_of(m_custom_mapping.begin(), m_custom_mapping.end(), matches)) && all_inputs_fp32; + std::any_of(m_custom_mapping.begin(), m_custom_mapping.end(), matches)) && + all_inputs_fp32; } -} // namespace op -} // namespace tpp -} // namespace intel_cpu -} // namespace ov +} // namespace op +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/factory.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/factory.hpp index b3090ebdec47e2..9cfcc2f6226205 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/factory.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/factory.hpp @@ -4,8 +4,8 @@ #pragma once -#include "openvino/op/op.hpp" #include "openvino/core/type.hpp" +#include "openvino/op/op.hpp" namespace ov { namespace intel_cpu { @@ -21,11 +21,12 @@ class NodeFactory { tpp_matcher matcher; tpp_builder builder; }; + private: static std::unordered_map m_direct_mapping; static std::vector m_custom_mapping; }; -} // namespace op -} // namespace tpp -} // namespace intel_cpu -} // namespace ov +} // namespace op +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/modifiers.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/modifiers.hpp index 507276a1c2c898..d9ecc3629f2430 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/modifiers.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/modifiers.hpp @@ -16,19 +16,19 @@ namespace modifier { * @ingroup snippets */ class TensorProcessingPrimitive : virtual public snippets::modifier::MemoryAccess { - public: - void clone_memory_access_ports(const TensorProcessingPrimitive& other) { - m_input_ports = other.m_input_ports; - m_output_ports = other.m_output_ports; - } - static bool visit_attributes(AttributeVisitor& visitor) { - std::string modifier{"TPP"}; - visitor.on_attribute("modifier", modifier); - return true; - } +public: + void clone_memory_access_ports(const TensorProcessingPrimitive& other) { + m_input_ports = other.m_input_ports; + m_output_ports = other.m_output_ports; + } + static bool visit_attributes(AttributeVisitor& visitor) { + std::string modifier{"TPP"}; + visitor.on_attribute("modifier", modifier); + return true; + } }; -} // namespace modifier -} // namespace tpp -} // namespace intel_cpu -} // namespace ov +} // namespace modifier +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/reduce.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/reduce.cpp index 63119623856bc7..11fc73b949a55c 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/reduce.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/reduce.cpp @@ -10,8 +10,8 @@ namespace tpp { namespace op { ReduceMax::ReduceMax(const Output& arg, size_t axis) - : UnaryEltwiseTPP(LIBXSMM_MELTW_TYPE_UNARY_REDUCE_X_OP_MAX), ov::snippets::op::ReduceMax(arg, axis) { -} + : UnaryEltwiseTPP(LIBXSMM_MELTW_TYPE_UNARY_REDUCE_X_OP_MAX), + ov::snippets::op::ReduceMax(arg, axis) {} std::shared_ptr ReduceMax::clone_with_new_inputs(const OutputVector& new_args) const { check_new_args_count(this, new_args); @@ -26,8 +26,8 @@ bool ReduceMax::visit_attributes(AttributeVisitor& visitor) { } ReduceSum::ReduceSum(const Output& arg, size_t axis) - : UnaryEltwiseTPP(LIBXSMM_MELTW_TYPE_UNARY_REDUCE_X_OP_ADD), ov::snippets::op::ReduceSum(arg, axis) { -} + : UnaryEltwiseTPP(LIBXSMM_MELTW_TYPE_UNARY_REDUCE_X_OP_ADD), + ov::snippets::op::ReduceSum(arg, axis) {} std::shared_ptr ReduceSum::clone_with_new_inputs(const OutputVector& new_args) const { check_new_args_count(this, new_args); @@ -41,7 +41,7 @@ bool ReduceSum::visit_attributes(AttributeVisitor& visitor) { return UnaryEltwiseTPP::visit_attributes(visitor); } -} // namespace op -} // namespace tpp -} // namespace intel_cpu -} // namespace ov +} // namespace op +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/reduce.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/reduce.hpp index 9542c4ec90b0b6..f66e913f85b6e7 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/reduce.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/reduce.hpp @@ -4,12 +4,10 @@ #pragma once -#include "modifiers.hpp" #include "eltwise.hpp" -#include "snippets/op/reduce.hpp" - - #include "libxsmm_typedefs.h" +#include "modifiers.hpp" +#include "snippets/op/reduce.hpp" namespace ov { namespace intel_cpu { @@ -24,6 +22,7 @@ class ReduceMax : public UnaryEltwiseTPP, public ov::snippets::op::ReduceMax { ReduceMax(const Output& arg, size_t axis); std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; bool visit_attributes(AttributeVisitor& visitor) override; + private: libxsmm_meltw_binary_type m_op_type; }; @@ -34,11 +33,12 @@ class ReduceSum : public UnaryEltwiseTPP, public ov::snippets::op::ReduceSum { ReduceSum(const Output& arg, size_t axis); std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; bool visit_attributes(AttributeVisitor& visitor) override; + private: libxsmm_meltw_binary_type m_op_type; }; -} // namespace op -} // namespace tpp -} // namespace intel_cpu -} // namespace ov +} // namespace op +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.cpp index 566a2a5afde658..98a107380aa7d4 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.cpp @@ -3,6 +3,7 @@ // #include "scalar.hpp" + #include "modifiers.hpp" namespace ov { @@ -21,11 +22,11 @@ std::shared_ptr Scalar::clone_with_new_inputs(const OutputVector& new_args bool Scalar::visit_attributes(AttributeVisitor& visitor) { modifier::TensorProcessingPrimitive::visit_attributes(visitor); - return snippets::op::Scalar::visit_attributes(visitor);; + return snippets::op::Scalar::visit_attributes(visitor); + ; } - -} // namespace op -} // namespace tpp -} // namespace intel_cpu -} // namespace ov +} // namespace op +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.hpp index f9578c20fb13f5..c619d7b6ab1937 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.hpp @@ -4,8 +4,8 @@ #pragma once -#include "modifiers.hpp" #include "eltwise.hpp" +#include "modifiers.hpp" #include "snippets/op/reduce.hpp" namespace ov { @@ -26,7 +26,7 @@ class Scalar : public ov::snippets::op::Scalar { bool visit_attributes(AttributeVisitor& visitor) override; }; -} // namespace op -} // namespace tpp -} // namespace intel_cpu -} // namespace ov +} // namespace op +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.cpp index 53992b1e67da9c..571e292104d132 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.cpp @@ -2,22 +2,18 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/itt.hpp" - #include "brgemm_to_brgemm_tpp.hpp" -#include "snippets/utils/utils.hpp" -#include "snippets/op/brgemm.hpp" -#include "transformations/tpp/x64/op/brgemm.hpp" - +#include "cpu_shape.h" #include "openvino/core/rt_info.hpp" -#include "openvino/pass/pattern/op/wrap_type.hpp" #include "openvino/pass/pattern/matcher.hpp" - -#include "cpu_shape.h" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "snippets/itt.hpp" +#include "snippets/op/brgemm.hpp" +#include "snippets/utils/utils.hpp" +#include "transformations/tpp/x64/op/brgemm.hpp" #include "utils/general_utils.h" - namespace ov { namespace intel_cpu { namespace tpp { @@ -27,13 +23,15 @@ using namespace snippets::lowered; bool BrgemmToBrgemmTPP::is_supported_brgemm_configuration(const std::vector>& layouts, const ov::element::TypeVector& precisions) { - OPENVINO_ASSERT(layouts.size() == 3 && precisions.size() == 3, "snippets::op::Brgemm must have 2 inputs and 1 output"); + OPENVINO_ASSERT(layouts.size() == 3 && precisions.size() == 3, + "snippets::op::Brgemm must have 2 inputs and 1 output"); const bool supported_layouts = std::all_of(layouts.begin(), layouts.end(), [](const std::vector& layout) { return layout.empty() || layout.back() == layout.size() - 1; }); - const bool supported_precisions = std::all_of(precisions.begin(), precisions.end(), [](const ov::element::Type& et) { - return et == ov::element::f32; - }); + const bool supported_precisions = + std::all_of(precisions.begin(), precisions.end(), [](const ov::element::Type& et) { + return et == ov::element::f32; + }); return supported_layouts && supported_precisions; } @@ -79,17 +77,28 @@ BrgemmToBrgemmTPP::BrgemmToBrgemmTPP() { if (precision_a == ov::element::f32) { brgemm_tpp = std::make_shared(brgemm->input_value(0), brgemm->input_value(1), - offset_a, offset_b, offset_c, - layout_a, layout_b, layout_c); + offset_a, + offset_b, + offset_c, + layout_a, + layout_b, + layout_c); } OPENVINO_ASSERT(brgemm_tpp, "Failed to create BrgemmTPP node in the BrgemmToBrgemmTPP pass"); brgemm_tpp->set_friendly_name(brgemm->get_friendly_name()); ov::replace_node(brgemm, brgemm_tpp); - // Set FULL_DIM tensors on ports to avoid automatic loop markup (blocked loops will be inserted in a separate transformation) - PortDescriptorUtils::set_port_descriptor(brgemm_tpp->input(0), brgemm_in0_desc->get_subtensor(), brgemm_in0_desc->get_layout()); - PortDescriptorUtils::set_port_descriptor(brgemm_tpp->input(1), brgemm_in1_desc->get_subtensor(), brgemm_in1_desc->get_layout()); - PortDescriptorUtils::set_port_descriptor(brgemm_tpp->output(0), brgemm_out_desc->get_subtensor(), brgemm_out_desc->get_layout()); + // Set FULL_DIM tensors on ports to avoid automatic loop markup (blocked loops will be inserted in a separate + // transformation) + PortDescriptorUtils::set_port_descriptor(brgemm_tpp->input(0), + brgemm_in0_desc->get_subtensor(), + brgemm_in0_desc->get_layout()); + PortDescriptorUtils::set_port_descriptor(brgemm_tpp->input(1), + brgemm_in1_desc->get_subtensor(), + brgemm_in1_desc->get_layout()); + PortDescriptorUtils::set_port_descriptor(brgemm_tpp->output(0), + brgemm_out_desc->get_subtensor(), + brgemm_out_desc->get_layout()); // need to run validate_and_infer_types manually: either input shapes were updated or // output Layout was updated (out shape will be updated in validate_and_infer_types()) @@ -101,7 +110,7 @@ BrgemmToBrgemmTPP::BrgemmToBrgemmTPP() { auto m = std::make_shared(m_brgemm, matcher_name); register_matcher(m, callback); } -} // namespace pass -} // namespace tpp -} // namespace intel_cpu -} // namespace ov +} // namespace pass +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.hpp index 2b73104d1e1335..6e1d9f110c6aec 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.hpp @@ -16,7 +16,7 @@ namespace pass { * @brief Converts Snippets Brgemm to BrgemmTPP operation, if possible. Only fp32 Brgemms are currently converted. * @ingroup snippets */ -class BrgemmToBrgemmTPP: public ov::pass::MatcherPass { +class BrgemmToBrgemmTPP : public ov::pass::MatcherPass { public: OPENVINO_MATCHER_PASS_RTTI("BrgemmToBrgemmTPP"); BrgemmToBrgemmTPP(); @@ -25,7 +25,6 @@ class BrgemmToBrgemmTPP: public ov::pass::MatcherPass { const ov::element::TypeVector& precisions); }; - } // namespace pass } // namespace tpp } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.cpp index da83038f5455f8..63dd44ca133fa0 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.cpp @@ -2,17 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/itt.hpp" -#include "snippets/utils/utils.hpp" #include "eltwise_to_eltwise_tpp.hpp" -#include "openvino/pass/pattern/op/wrap_type.hpp" - -#include "transformations/tpp/x64/op/factory.hpp" #include "openvino/op/util/binary_elementwise_arithmetic.hpp" #include "openvino/op/util/unary_elementwise_arithmetic.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "snippets/itt.hpp" #include "snippets/lowered/port_descriptor.hpp" #include "snippets/op/reduce.hpp" +#include "snippets/utils/utils.hpp" +#include "transformations/tpp/x64/op/factory.hpp" namespace ov { namespace intel_cpu { @@ -29,7 +28,6 @@ EltwiseToEltwiseTPP::EltwiseToEltwiseTPP() { ov::op::util::BinaryElementwiseArithmetic, ov::snippets::op::ReduceBase>(is_supported_by_tpp); - auto callback = [=](ov::pass::pattern::Matcher& m) { OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "ov::intel_cpu::pass::EltwiseToEltwiseTPP") const auto node = m.get_match_root(); @@ -41,7 +39,8 @@ EltwiseToEltwiseTPP::EltwiseToEltwiseTPP() { OPENVINO_ASSERT(tpp_eltwise, "Failed to create TPP node"); const size_t M_block = 32; - const size_t N_block = ov::is_type(node) ? ov::snippets::utils::get_full_dim_value() : 64; + const size_t N_block = + ov::is_type(node) ? ov::snippets::utils::get_full_dim_value() : 64; ov::replace_node_update_name(node, tpp_eltwise); for (size_t i = 0; i < node->get_input_size(); i++) ov::snippets::lowered::PortDescriptorUtils::set_port_descriptor(tpp_eltwise->input(i), {M_block, N_block}); @@ -54,7 +53,7 @@ EltwiseToEltwiseTPP::EltwiseToEltwiseTPP() { auto m = std::make_shared(supported_eltwise, matcher_name); register_matcher(m, callback); } -} // namespace pass -} // namespace tpp -} // namespace intel_cpu -} // namespace ov +} // namespace pass +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.hpp index f0bdab120c3498..0b68074c657c15 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.hpp @@ -16,13 +16,12 @@ namespace pass { * @brief Converts elementwise operations supported by the TPP backend to the dedicated TPP opset * @ingroup snippets */ -class EltwiseToEltwiseTPP: public ov::pass::MatcherPass { +class EltwiseToEltwiseTPP : public ov::pass::MatcherPass { public: OPENVINO_MATCHER_PASS_RTTI("EltwiseToEltwiseTPP"); EltwiseToEltwiseTPP(); }; - } // namespace pass } // namespace tpp } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/fuse_tpp_to_equations.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/fuse_tpp_to_equations.cpp index 885ff753843588..b64522154adc9e 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/fuse_tpp_to_equations.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/fuse_tpp_to_equations.cpp @@ -3,25 +3,25 @@ // #include "fuse_tpp_to_equations.hpp" -#include "transformations/tpp/x64/op/eltwise.hpp" -#include "transformations/tpp/x64/op/equation.hpp" -#include "snippets/utils/utils.hpp" #include "snippets/itt.hpp" #include "snippets/lowered/port_descriptor.hpp" +#include "snippets/utils/utils.hpp" +#include "transformations/tpp/x64/op/eltwise.hpp" +#include "transformations/tpp/x64/op/equation.hpp" namespace ov { namespace intel_cpu { namespace tpp { namespace pass { -using snippets::lowered::ExpressionPtr; using snippets::lowered::ExpressionPort; +using snippets::lowered::ExpressionPtr; using NodePtr = std::shared_ptr; bool FuseTPPToEquations::fuse_from_root(const NodePtr& root, const std::shared_ptr& m) { using snippets::lowered::PortDescriptorUtils; OutputVector eq_ivals; std::vector op_descs; - std::unordered_map node_replace_map; + std::unordered_map node_replace_map; // Only ops with one out are supported due to Equations restrictions auto supported_num_out = [](const Output& out) { const auto& n = out.get_node_shared_ptr(); @@ -30,10 +30,10 @@ bool FuseTPPToEquations::fuse_from_root(const NodePtr& root, const std::shared_p auto get_tpp_op = [](const NodePtr& n) { auto tpp = std::dynamic_pointer_cast(n); bool not_supported_op = - // ticket: 152532 - ov::is_type(n) || - // ticket: 152510 - ov::is_type(n); + // ticket: 152532 + ov::is_type(n) || + // ticket: 152510 + ov::is_type(n); return not_supported_op ? nullptr : tpp; }; @@ -78,7 +78,6 @@ bool FuseTPPToEquations::fuse_from_root(const NodePtr& root, const std::shared_p } } - auto equation = std::make_shared(eq_ivals, op_descs); for (auto& kv : node_replace_map) @@ -110,8 +109,7 @@ bool FuseTPPToEquations::run_on_model(const std::shared_ptr& m) { return modified; } - -} // namespace pass -} // namespace tpp -} // namespace intel_cpu -} // namespace ov +} // namespace pass +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/fuse_tpp_to_equations.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/fuse_tpp_to_equations.hpp index a99330845d443d..326766d000f69a 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/fuse_tpp_to_equations.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/fuse_tpp_to_equations.hpp @@ -16,11 +16,12 @@ namespace pass { * @brief Converts a group of elementwise operations into a fused TPP Equation node * @ingroup snippets */ -class FuseTPPToEquations: public ov::pass::ModelPass { +class FuseTPPToEquations : public ov::pass::ModelPass { public: OPENVINO_MODEL_PASS_RTTI("FuseTPPToEquations"); FuseTPPToEquations() = default; bool run_on_model(const std::shared_ptr& m) override; + private: static bool fuse_from_root(const std::shared_ptr&, const std::shared_ptr& m); }; diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.cpp index fa545c26dbb53e..d9485b1c6b7b9d 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.cpp @@ -11,7 +11,6 @@ #include "snippets/utils/utils.hpp" #include "transformations/tpp/x64/op/brgemm.hpp" - namespace ov { namespace intel_cpu { namespace tpp { @@ -28,28 +27,35 @@ bool BrgemmTPPBlocking::SetBrgemmBeta::run(ov::snippets::lowered::LinearIR& line return true; } -std::shared_ptr BrgemmTPPBlocking::SetBrgemmBeta::merge(const std::shared_ptr& other) { +std::shared_ptr BrgemmTPPBlocking::SetBrgemmBeta::merge( + const std::shared_ptr& other) { return !other || ov::is_type(other) ? std::make_shared() : nullptr; } -std::tuple BrgemmTPPBlocking::get_blocking_params(const ov::snippets::lowered::ExpressionPtr& brgemm_expr) const { +std::tuple BrgemmTPPBlocking::get_blocking_params( + const ov::snippets::lowered::ExpressionPtr& brgemm_expr) const { size_t m, n, k; std::tie(m, n, k) = get_brgemm_dimensions(brgemm_expr); - OPENVINO_ASSERT(!is_dynamic_value(m) && !is_dynamic_value(n) && !is_dynamic_value(n), "BrgemmTPP doesn't support dynamic shapes"); + OPENVINO_ASSERT(!is_dynamic_value(m) && !is_dynamic_value(n) && !is_dynamic_value(n), + "BrgemmTPP doesn't support dynamic shapes"); size_t m_blk, n_blk, k_blk; std::tie(m_blk, n_blk, k_blk) = BrgemmBlockingBase::get_blocking_params(brgemm_expr); - auto get_projected_blk = [](const size_t dim, const size_t blk) { return ov::snippets::utils::is_full_dim_value(blk) ? dim : blk; }; + auto get_projected_blk = [](const size_t dim, const size_t blk) { + return ov::snippets::utils::is_full_dim_value(blk) ? dim : blk; + }; return std::make_tuple(get_projected_blk(m, m_blk), get_projected_blk(n, n_blk), get_projected_blk(k, k_blk)); } -ov::snippets::lowered::SpecificIterationHandlers BrgemmTPPBlocking::get_k_loop_handlers(size_t work_amount, size_t block_size) const { - ov::snippets::lowered::SpecificIterationHandlers handlers = ov::snippets::lowered::pass::BrgemmBlockingBase::get_k_loop_handlers(work_amount, block_size); +ov::snippets::lowered::SpecificIterationHandlers BrgemmTPPBlocking::get_k_loop_handlers(size_t work_amount, + size_t block_size) const { + ov::snippets::lowered::SpecificIterationHandlers handlers = + ov::snippets::lowered::pass::BrgemmBlockingBase::get_k_loop_handlers(work_amount, block_size); handlers.register_pass(); return handlers; } -} // namespace pass -} // namespace tpp -} // namespace intel_cpu -} // namespace ov +} // namespace pass +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.hpp index 908d12087175aa..31f4bfeadc8979 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.hpp @@ -36,12 +36,15 @@ class BrgemmTPPBlocking : public ov::snippets::lowered::pass::BrgemmBlocking merge(const std::shared_ptr& other) override; + std::shared_ptr merge( + const std::shared_ptr& other) override; }; private: - std::tuple get_blocking_params(const ov::snippets::lowered::ExpressionPtr& brgemm_expr) const override; - ov::snippets::lowered::SpecificIterationHandlers get_k_loop_handlers(size_t work_amount, size_t block_size) const override; + std::tuple get_blocking_params( + const ov::snippets::lowered::ExpressionPtr& brgemm_expr) const override; + ov::snippets::lowered::SpecificIterationHandlers get_k_loop_handlers(size_t work_amount, + size_t block_size) const override; }; } // namespace pass diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.cpp index dcd97fdd74b638..42c30bb112263c 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.cpp @@ -2,13 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/itt.hpp" -#include "snippets/op/buffer.hpp" -#include "transformations/tpp/x64/op/modifiers.hpp" #include "set_tpp_leading_dim.hpp" -#include "snippets/op/brgemm.hpp" + +#include "snippets/itt.hpp" #include "snippets/lowered/loop_manager.hpp" +#include "snippets/op/brgemm.hpp" +#include "snippets/op/buffer.hpp" #include "snippets/utils/utils.hpp" +#include "transformations/tpp/x64/op/modifiers.hpp" namespace ov { namespace intel_cpu { @@ -24,7 +25,7 @@ using LoopPort = snippets::lowered::LoopPort; bool has_directly_connected_buffer(const ExpressionPort& port, const snippets::lowered::LoopManagerPtr& loop_mngr) { auto accepted_loops = [&loop_mngr, &port](const std::vector& orig, const std::vector& connect) { size_t connect_idx = 0; - auto pred = [&port](const LoopPort& loop_port ) { + auto pred = [&port](const LoopPort& loop_port) { return *loop_port.get_expr_port() == port; }; for (const auto orig_loop : orig) { @@ -36,9 +37,8 @@ bool has_directly_connected_buffer(const ExpressionPort& port, const snippets::l // as long as the port is the loop entry/exit, and it is not incremented. // This is the case for Brgemm K-blocking loops, for example. const auto loop_info = loop_mngr->get_loop_info(orig_loop); - const auto& border_points = port.get_type() == ExpressionPort::Type::Input ? - loop_info->get_input_ports() : - loop_info->get_output_ports(); + const auto& border_points = port.get_type() == ExpressionPort::Type::Input ? loop_info->get_input_ports() + : loop_info->get_output_ports(); const auto& found = std::find_if(border_points.begin(), border_points.end(), pred); if (found == border_points.end() || found->is_incremented()) return false; @@ -87,36 +87,35 @@ size_t get_leading_dim(ExpressionPort port, const snippets::lowered::LoopManager } OPENVINO_ASSERT(layout.empty() || (layout.back() == layout.size() - 1 && layout.size() == shape.size()), - "get_leading_dim detected invalid layout values: check shape + layout combination"); + "get_leading_dim detected invalid layout values: check shape + layout combination"); const auto dim = [&]() -> size_t { - switch (port.get_type()) { - // Input shape is original, so we need to correctly read this data by order - // Example: - // Original shape (shape) = [1, 49, 2, 23] - // Layout (transpose order) = [2, 0, 1, 3] - // Transposed shape = [2, 1, 49, 23] - // The leading dimension is equal to stride of shape[layout[3]] = 2 x 23 - case ExpressionPort::Type::Input : - return snippets::utils::get_input_dim_idx(layout, 1); // `1` in example - // Output shape is already transposed, we need to correctly write the data with original shape by the order - // Example: - // Original transposed shape (shape) = [49, 2, 7, 39] - // Layout (transpose order) = [2, 0, 1, 3] - // Before leading dimension with index 3 there is dimension with index 2 in planar layout. - // Since we have non-planar layout, we have to find this before LD dim in transposed order. - // In layout 2nd idx is first element, it means, that the leading dimension is equal to stride of shape[0] - case ExpressionPort::Type::Output : - return snippets::utils::get_output_dim_idx(layout, 1); // 0 in the example: shape[0] = 49 - default: - OPENVINO_THROW("Unsupported Expression port type"); + switch (port.get_type()) { + // Input shape is original, so we need to correctly read this data by order + // Example: + // Original shape (shape) = [1, 49, 2, 23] + // Layout (transpose order) = [2, 0, 1, 3] + // Transposed shape = [2, 1, 49, 23] + // The leading dimension is equal to stride of shape[layout[3]] = 2 x 23 + case ExpressionPort::Type::Input: + return snippets::utils::get_input_dim_idx(layout, 1); // `1` in example + // Output shape is already transposed, we need to correctly write the data with original shape by the order + // Example: + // Original transposed shape (shape) = [49, 2, 7, 39] + // Layout (transpose order) = [2, 0, 1, 3] + // Before leading dimension with index 3 there is dimension with index 2 in planar layout. + // Since we have non-planar layout, we have to find this before LD dim in transposed order. + // In layout 2nd idx is first element, it means, that the leading dimension is equal to stride of shape[0] + case ExpressionPort::Type::Output: + return snippets::utils::get_output_dim_idx(layout, 1); // 0 in the example: shape[0] = 49 + default: + OPENVINO_THROW("Unsupported Expression port type"); } }; - return layout.size() == 1 ? - shape.back() : - std::accumulate(shape.cbegin() + dim() + 1, shape.cend(), 1, std::multiplies()); + return layout.size() == 1 ? shape.back() + : std::accumulate(shape.cbegin() + dim() + 1, shape.cend(), 1, std::multiplies()); } -} // namespace +} // namespace SetTPPLeadingDim::SetTPPLeadingDim() : RangedPass() {} @@ -151,8 +150,7 @@ bool SetTPPLeadingDim::run(snippets::lowered::LinearIR& linear_ir, return modified; } - -} // namespace pass -} // namespace tpp -} // namespace intel_cpu -} // namespace ov +} // namespace pass +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.hpp index d755e4813dde8e..6be200c30b7c1c 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.hpp @@ -15,10 +15,11 @@ namespace pass { * @interface SetTPPLeadingDim * @brief TPP leading dimension depends on the operation it is connected to. If it's a Parameter or Result * we can compute LD based on shape, if it's a Buffer - we need to consider allocation shape. - * This transformation should be performed before InsertTailLoop because it may change graph connectivity for 1st and last iterations. + * This transformation should be performed before InsertTailLoop because it may change graph connectivity for 1st and + * last iterations. * @ingroup snippets */ -class SetTPPLeadingDim: public snippets::lowered::pass::RangedPass { +class SetTPPLeadingDim : public snippets::lowered::pass::RangedPass { public: OPENVINO_RTTI("SetTPPLeadingDim", "0", snippets::lowered::pass::RangedPass); SetTPPLeadingDim(); diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/scalar_to_scalar_tpp.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/scalar_to_scalar_tpp.cpp index 0b9f41d47aa0da..06ca575f314b4b 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/scalar_to_scalar_tpp.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/scalar_to_scalar_tpp.cpp @@ -2,14 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/itt.hpp" #include "scalar_to_scalar_tpp.hpp" + #include "openvino/pass/pattern/op/wrap_type.hpp" +#include "snippets/itt.hpp" +#include "snippets/lowered/port_connector.hpp" #include "snippets/op/scalar.hpp" -#include "transformations/tpp/x64/op/scalar.hpp" #include "transformations/tpp/x64/op/modifiers.hpp" -#include "snippets/lowered/port_connector.hpp" - +#include "transformations/tpp/x64/op/scalar.hpp" namespace ov { namespace intel_cpu { @@ -21,7 +21,6 @@ ScalarToScalarTPP::ScalarToScalarTPP() { auto snippets_scalar = ov::pass::pattern::wrap_type(); - auto callback = [=](ov::pass::pattern::Matcher& m) { OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "ov::intel_cpu::pass::ScalarToScalarTPP") const auto node = ov::as_type_ptr(m.get_match_root()); @@ -51,7 +50,7 @@ ScalarToScalarTPP::ScalarToScalarTPP() { auto m = std::make_shared(snippets_scalar, matcher_name); register_matcher(m, callback); } -} // namespace pass -} // namespace tpp -} // namespace intel_cpu -} // namespace ov +} // namespace pass +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/scalar_to_scalar_tpp.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/scalar_to_scalar_tpp.hpp index a56e23363067e2..2a7e712ab1baea 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/scalar_to_scalar_tpp.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/scalar_to_scalar_tpp.hpp @@ -16,13 +16,12 @@ namespace pass { * @brief Converts snippets::op::Scalar to tpp::op::Scalar, since TPP operations require a dedicated emitter * @ingroup snippets */ -class ScalarToScalarTPP: public ov::pass::MatcherPass { +class ScalarToScalarTPP : public ov::pass::MatcherPass { public: OPENVINO_MATCHER_PASS_RTTI("ScalarToScalarTPP"); ScalarToScalarTPP(); }; - } // namespace pass } // namespace tpp } // namespace intel_cpu