From c02e2ac19d6f4ffc3775f27ea2553bafe9a8fb10 Mon Sep 17 00:00:00 2001 From: Nashez Zubair <35090095+nashez@users.noreply.github.com> Date: Tue, 26 Nov 2024 14:12:42 +0530 Subject: [PATCH] [CPU][ARM64] Add JIT emitter for Eltwise LogicalAnd operation (#27709) ### Details: * Added a jit_logical_and_emitter derived class in aarch64/jit_eltwise_emitters * Created entry Algorithm::EltwiseLogicalAnd in the get_supported_precisions in nodes/kernels/aarch64 * Add the EltwiseLogicalAnd entry in the aarch64 executors supported algorithms ### Tickets: Closes: #27503 Signed-off-by: Nashez Zubair --- .../plugin/aarch64/jit_eltwise_emitters.cpp | 52 +++++++++++++++++++ .../plugin/aarch64/jit_eltwise_emitters.hpp | 28 ++++++++++ .../nodes/executors/aarch64/jit_eltwise.cpp | 1 + .../aarch64/jit_uni_eltwise_generic.cpp | 2 + 4 files changed, 83 insertions(+) diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp index 355c8fb7f4c4d7..83cdd252f9bc6f 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp @@ -1232,6 +1232,58 @@ std::set> jit_less_equal_emitter::get_supported_preci return {{element::f32, element::f32}}; } +/// LOGICAL_AND /// +jit_logical_and_emitter::jit_logical_and_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& node) + : jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) { + prepare_table(); +} + +jit_logical_and_emitter::jit_logical_and_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc) + : jit_emitter(host, host_isa, exec_prc) { + prepare_table(); +} + +size_t jit_logical_and_emitter::get_inputs_count() const { return 2; } + +size_t jit_logical_and_emitter::get_aux_vecs_count() const { return 1; } + +size_t jit_logical_and_emitter::get_aux_gprs_count() const { return 1; } + +void jit_logical_and_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) { + emit_isa(in_vec_idxs, out_vec_idxs); + } else { + OV_CPU_JIT_EMITTER_THROW("Can't create jit eltwise kernel"); + } +} + +template +void jit_logical_and_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + OV_CPU_JIT_EMITTER_ASSERT(exec_prc_ == ov::element::f32, "unsupported precision: " + exec_prc_.to_string()); + + using TReg = typename dnnl::impl::cpu::aarch64::cpu_isa_traits::TReg; + const TReg src1 = TReg(in_vec_idxs[0]); + const TReg src2 = TReg(in_vec_idxs[1]); + const TReg dst = TReg(out_vec_idxs[0]); + const TReg aux = TReg(aux_vec_idxs[0]); + + h->and_(dst.b16, src1.b16, src2.b16); + h->ld1r(aux.s, table_val2("one")); + h->and_(dst.b16, dst.b16, aux.b16); +} + +void jit_logical_and_emitter::register_table_entries() { + push_arg_entry_of("one", 0x3f800000, true); +} + +std::set> jit_logical_and_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}}; +} + /// LOGICAL_NOT /// jit_logical_not_emitter::jit_logical_not_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp index a99e016c9c834a..fa4f4141c388e4 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp @@ -579,6 +579,34 @@ class jit_less_equal_emitter : public jit_emitter { void register_table_entries() override; }; +class jit_logical_and_emitter : public jit_emitter { +public: + jit_logical_and_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc = ov::element::f32); + + jit_logical_and_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& n); + + size_t get_inputs_count() const override; + + size_t get_aux_vecs_count() const override; + + size_t get_aux_gprs_count() const override; + + static std::set> get_supported_precisions( + const std::shared_ptr& node = nullptr); + +private: + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; + + template + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + + void register_table_entries() override; +}; + class jit_logical_not_emitter : public jit_emitter { public: jit_logical_not_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, diff --git a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp index 6da6b63eb94a72..660db85cd61529 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp @@ -36,6 +36,7 @@ bool JitEltwiseExecutor::isSupported( Algorithm::EltwiseIsInf, Algorithm::EltwiseIsNaN, Algorithm::EltwiseLessEqual, + Algorithm::EltwiseLogicalAnd, Algorithm::EltwiseLogicalNot, Algorithm::EltwiseLogicalXor, Algorithm::EltwiseMaximum, diff --git a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp index b7fbfaf16e1587..7ac3b603353541 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp @@ -653,6 +653,7 @@ std::shared_ptr jit_uni_eltwise_generic::create_eltwise_emitte OV_CASE(Algorithm::EltwiseIsFinite, ov::intel_cpu::aarch64::jit_is_finite_emitter), OV_CASE(Algorithm::EltwiseIsInf, ov::intel_cpu::aarch64::jit_is_inf_emitter), OV_CASE(Algorithm::EltwiseLessEqual, ov::intel_cpu::aarch64::jit_less_equal_emitter), + OV_CASE(Algorithm::EltwiseLogicalAnd, ov::intel_cpu::aarch64::jit_logical_and_emitter), OV_CASE(Algorithm::EltwiseLogicalNot, ov::intel_cpu::aarch64::jit_logical_not_emitter), OV_CASE(Algorithm::EltwiseLogicalXor, ov::intel_cpu::aarch64::jit_logical_xor_emitter), OV_CASE(Algorithm::EltwiseIsNaN, ov::intel_cpu::aarch64::jit_is_nan_emitter), @@ -839,6 +840,7 @@ std::set> eltwise_precision_helper::get_supported_pre OV_CASE(Algorithm::EltwiseIsInf, jit_is_inf_emitter), OV_CASE(Algorithm::EltwiseIsNaN, jit_is_nan_emitter), OV_CASE(Algorithm::EltwiseLessEqual, jit_less_equal_emitter), + OV_CASE(Algorithm::EltwiseLogicalAnd, jit_logical_and_emitter), OV_CASE(Algorithm::EltwiseLogicalNot, jit_logical_not_emitter), OV_CASE(Algorithm::EltwiseLogicalXor, jit_logical_xor_emitter), OV_CASE(Algorithm::EltwiseMaximum, jit_maximum_emitter),