diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp index 05a0e0a2cf6a0e..fa8a445ffb9918 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp @@ -2255,6 +2255,75 @@ std::set> jit_sigmoid_emitter::get_supported_precisio return {{element::f32}}; } +/// SOFT_PLUS /// +jit_soft_plus_emitter::jit_soft_plus_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& node) + : jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) { + prepare_table(); + exp_emitter = std::make_unique(h, host_isa, exec_prc); +} + +jit_soft_plus_emitter::jit_soft_plus_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc) : jit_emitter(host, host_isa, exec_prc) { + prepare_table(); + exp_emitter = std::make_unique(h, host_isa, exec_prc); +} + +size_t jit_soft_plus_emitter::get_inputs_count() const { return 1; } + +size_t jit_soft_plus_emitter::get_aux_vecs_count() const { return exp_emitter->get_aux_vecs_count() + 2; } + +size_t jit_soft_plus_emitter::get_aux_gprs_count() const { return exp_emitter->get_aux_gprs_count() + 1; } + +void jit_soft_plus_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) { + emit_isa(in_vec_idxs, out_vec_idxs); + } else { + OPENVINO_THROW("Can't create jit eltwise kernel"); + } +} + +template +void jit_soft_plus_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + if (exec_prc_ != ov::element::f32) { + OPENVINO_THROW("unsupported precision: " + exec_prc_.to_string()); + } + + using TReg = typename dnnl::impl::cpu::aarch64::cpu_isa_traits::TReg; + const TReg src(in_vec_idxs[0]); + const TReg dst(out_vec_idxs[0]); + const TReg aux1(aux_vec_idxs[exp_emitter->get_aux_vecs_count()]); + const TReg aux2(aux_vec_idxs[exp_emitter->get_aux_vecs_count() + 1]); + + exp_emitter->emit_code( + { src.getIdx() }, + out_vec_idxs, + aux_vec_idxs, + aux_gpr_idxs); + h->ld1r(aux1.s, table_val2("one")); + h->fadd(dst.s, dst.s, aux1.s); + h->fcvtzs(aux2.s, dst.s); + h->cls(aux1.s, aux2.s); + h->ld1r(aux2.s, table_val("bit_count")); + h->fsub(aux1.s, aux2.s, aux1.s); + // aux1.s contains nearest power of 2 for e^x + 1 + h->ld1r(aux2.s, table_val("ln2f")); + h->fmul(aux2.s, aux1.s, aux2.s); // Computed n*ln2 in aux2.s + h->fsub(dst.s, dst.s); +} + +void jit_soft_plus_emitter::register_table_entries() { + push_arg_entry_of("one", 0x3f800000, true); + push_arg_entry_of("threshold", 0x41a00000, true); // Threshold set to 20 + push_arg_entry_of("ln2f", 0x3f317218, true); // Natural log of 2 +} + +std::set> jit_soft_plus_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32}}; +} + /// SOFT_SIGN /// jit_soft_sign_emitter::jit_soft_sign_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp index be4e51cd0b759d..3278dcdb4a0ccb 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp @@ -956,6 +956,35 @@ class jit_sigmoid_emitter : public jit_emitter { void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; }; +class jit_soft_plus_emitter : public jit_emitter { +public: + jit_soft_plus_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc = ov::element::f32); + + jit_soft_plus_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& node); + + size_t get_inputs_count() const override; + + size_t get_aux_vecs_count() const override; + + size_t get_aux_gprs_count() const override; + + void register_table_entries() override; + + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); + +private: + std::unique_ptr exp_emitter; + + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; + + template + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; +}; + class jit_soft_sign_emitter : public jit_emitter { public: jit_soft_sign_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, diff --git a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp index 912fe23fcd1fcf..4d578614171b6c 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp @@ -54,6 +54,7 @@ bool JitEltwiseExecutor::isSupported( Algorithm::EltwiseRoundHalfToEven, Algorithm::EltwiseSelect, Algorithm::EltwiseSigmoid, + Algorithm::EltwiseSoftPlus, Algorithm::EltwiseSoftSign, Algorithm::EltwiseSqrt, Algorithm::EltwiseSubtract, diff --git a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp index b3fe7018d23677..de002898d56c52 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp @@ -676,6 +676,7 @@ std::shared_ptr jit_uni_eltwise_generic::create_eltwise_emitte OV_CASE(Algorithm::EltwiseRoundHalfToEven, ov::intel_cpu::aarch64::jit_round_half_to_even_emitter), OV_CASE(Algorithm::EltwiseSelect, ov::intel_cpu::aarch64::jit_select_emitter), OV_CASE(Algorithm::EltwiseSigmoid, ov::intel_cpu::aarch64::jit_sigmoid_emitter), + OV_CASE(Algorithm::EltwiseSoftPlus, ov::intel_cpu::aarch64::jit_soft_plus_emitter), OV_CASE(Algorithm::EltwiseSoftSign, ov::intel_cpu::aarch64::jit_soft_sign_emitter), OV_CASE(Algorithm::EltwiseSqrt, ov::intel_cpu::aarch64::jit_sqrt_emitter), OV_CASE(Algorithm::EltwiseSubtract, ov::intel_cpu::aarch64::jit_subtract_emitter), @@ -861,6 +862,7 @@ std::set> eltwise_precision_helper::get_supported_pre OV_CASE(Algorithm::EltwiseRoundHalfToEven, jit_round_half_to_even_emitter), OV_CASE(Algorithm::EltwiseSelect, jit_select_emitter), OV_CASE(Algorithm::EltwiseSigmoid, jit_sigmoid_emitter), + OV_CASE(Algorithm::EltwiseSoftPlus, jit_soft_plus_emitter), OV_CASE(Algorithm::EltwiseSoftSign, jit_soft_sign_emitter), OV_CASE(Algorithm::EltwiseSqrt, jit_sqrt_emitter), OV_CASE(Algorithm::EltwiseSubtract, jit_subtract_emitter), diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp index 57f098e1f234d2..7c8d0d65692612 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp @@ -193,6 +193,7 @@ std::string ActivationLayerCPUTest::getPrimitiveType(const utils::ActivationType (activation_type == utils::ActivationTypes::GeluTanh) || (activation_type == utils::ActivationTypes::Relu) || (activation_type == utils::ActivationTypes::Sigmoid) || + (activation_type == utils::ActivationTypes::SoftPlus) || (activation_type == utils::ActivationTypes::SoftSign) || (activation_type == utils::ActivationTypes::Sqrt) || (activation_type == utils::ActivationTypes::Swish) ||