Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
Signed-off-by: Michal Miotk <[email protected]>
  • Loading branch information
michal-miotk committed Nov 20, 2024
1 parent 2d58b17 commit e1fcc01
Show file tree
Hide file tree
Showing 25 changed files with 246 additions and 541 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ struct lstm_elt : public RNNParams<lstm_elt> {
const std::vector<activation_additional_params> activation_params = {},
const lstm_weights_order offset_order = lstm_weights_order::iofz,
const uint32_t direction = 0)
: RNNParams(id, x, {}, cell, {}, {}, {}, {}, "", "", clip, input_forget, activations, activation_params, offset_order, \
: RNNParams(id, x, {}, cell, {}, {}, {}, {}, "", "", clip, input_forget, activations, activation_params, offset_order,
direction == 0 ? ov::op::RecurrentSequenceDirection::FORWARD : ov::op::RecurrentSequenceDirection::REVERSE) {
if (!cell.empty())
input.pop_back();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,31 +160,21 @@ void post_optimize_weights::add_lstm_weights_reorder(primitive_id input_id, std:
cldnn::program_node& prev, cldnn::program_node& node, size_t i) {
OPENVINO_ASSERT(reorder_params != nullptr, "[GPU] WeightsReorderParams is not initialized.");
std::string reorder_id = input_id + "_reo_" + std::to_string(i);
auto hiddenSize = reorder_params->get_output_layout().get_shape()[1] / 4;
auto inputSize = static_cast<int>(reorder_params->get_output_layout().get_shape()[3]);
const auto dir_num = static_cast<int>(reorder_params->get_input_layout().get_shape()[0]);
auto hiddenSize = reorder_params->get_input_layout().get_shape()[1] / 4;
auto inputSize = static_cast<int>(reorder_params->get_input_layout().get_shape()[2]);
int size_third;
const int W_idx = 3;
if (i == W_idx) {
size_third = inputSize;
} else {
size_third = static_cast<int>(hiddenSize);
}
auto cropSizeR = cldnn::tensor{1, static_cast<int>(hiddenSize), 1, size_third, 1};
cldnn::layout reorder_layout;
if (i == W_idx) {
reorder_layout = reorder_params->get_output_layout();
} else {
reorder_layout = reorder_params->get_output_layout();
auto reorder_layout_new_shape = reorder_layout.get_shape();
reorder_layout_new_shape[3] = hiddenSize;
reorder_layout = reorder_layout.clone_with_other_shape(reorder_layout_new_shape);
}
auto reorder = std::make_shared<cldnn::reorder>(reorder_id, input_id, reorder_layout);
auto& reorder_node = p.get_or_create(reorder);
auto cropSizeR = cldnn::tensor{dir_num, static_cast<int>(hiddenSize), 1, size_third};
std::string crop_id_b = input_id + "_c";
auto get_crop_node = [&](int cropNum) -> cldnn::program_node& {
auto crop_id = primitive_id(crop_id_b + std::to_string(cropNum));
auto crop_prim = std::make_shared<cldnn::crop>(crop_id, reorder_id, cropSizeR, cldnn::tensor{0, static_cast<int>(cropNum*hiddenSize), 0, 0, 0});
auto crop_prim = std::make_shared<cldnn::crop>(crop_id, reorder_id, cropSizeR, cldnn::tensor{0, static_cast<int>(cropNum*hiddenSize), 0, 0});
return p.get_or_create(crop_prim);
};

Expand All @@ -194,19 +184,18 @@ void post_optimize_weights::add_lstm_weights_reorder(primitive_id input_id, std:
auto& crop3_node = get_crop_node(3);
std::vector<input_info> con_input{input_info(crop_id_b + "1"), input_info(crop_id_b + "0"), input_info(crop_id_b + "2"), input_info(crop_id_b + "3")};
cldnn::primitive_id concat_id{input_id + "cont"};
auto con = std::make_shared<cldnn::concatenation>(concat_id, con_input, 0);
auto con = std::make_shared<cldnn::concatenation>(concat_id, con_input, 1);
auto& con_node = p.get_or_create(con);
p.add_intermediate(con_node, node, prev, true);
p.add_intermediate(reorder_node, con_node, prev, true);
p.add_intermediate(crop1_node, con_node, reorder_node, true);
p.add_connection(reorder_node, crop0_node, 0);
p.add_connection(reorder_node, crop2_node, 0);
p.add_connection(reorder_node, crop3_node, 0);
p.add_intermediate(crop1_node, con_node, prev, true);
p.add_connection(prev, crop0_node, 0);
p.add_connection(prev, crop2_node, 0);
p.add_connection(prev, crop3_node, 0);
p.add_connection(crop0_node, con_node, 0);
p.add_connection(crop2_node, con_node, 0);
p.add_connection(crop3_node, con_node, 0);
std::string permute_id = input_id + "_perx";
std::vector<uint16_t> ord{2, 4, 3, 0, 1};
std::vector<uint16_t> ord{0, 2, 1};
auto permute = std::make_shared<cldnn::permute>(permute_id, input_info{concat_id}, ord);
auto& permute_node = p.get_or_create(permute);
p.add_intermediate(permute_node, node, con_node, true);
Expand All @@ -216,7 +205,6 @@ void post_optimize_weights::add_lstm_weights_reorder(primitive_id input_id, std:
p.mark_if_constant(node);
node.recalc_output_layout(false);
};
set_implementation_and_output(reorder_node);
set_implementation_and_output(crop1_node);
set_implementation_and_output(crop0_node);
set_implementation_and_output(crop2_node);
Expand All @@ -228,8 +216,9 @@ void post_optimize_weights::add_lstm_weights_reorder(primitive_id input_id, std:
void post_optimize_weights::add_lstm_bias_reorder(primitive_id input_id, std::shared_ptr<WeightsReorderParams> reorder_params, program& p, \
cldnn::program_node& prev, cldnn::program_node& node) {
OPENVINO_ASSERT(reorder_params != nullptr, "[GPU] WeightsReorderParams is not initialized.");
const auto dir_num = static_cast<int>(reorder_params->get_input_layout().get_shape()[0]);
auto hiddenSize = reorder_params->get_output_layout().get_shape()[1] / 4;
auto cropSize = cldnn::tensor{1, static_cast<int>(hiddenSize), 1, 1};
auto cropSize = cldnn::tensor{dir_num, static_cast<int>(hiddenSize), 1, 1};
std::string crop_id_b = input_id + "_c";
auto get_crop_node = [&](int cropNum) -> cldnn::program_node& {
auto crop_id = primitive_id(crop_id_b + std::to_string(cropNum));
Expand All @@ -242,7 +231,7 @@ void post_optimize_weights::add_lstm_bias_reorder(primitive_id input_id, std::sh
auto& crop3_node = get_crop_node(3);
std::vector<input_info> con_input{input_info(crop1_node.id()), input_info(crop0_node.id()), input_info(crop2_node.id()), input_info(crop3_node.id())};
cldnn::primitive_id concat_id{input_id + "concat"};
auto con = std::make_shared<cldnn::concatenation>(concat_id, con_input, 2);
auto con = std::make_shared<cldnn::concatenation>(concat_id, con_input, 1);
auto& con_node = p.get_or_create(con);
p.add_intermediate(con_node, node, prev, true);
p.add_intermediate(crop1_node, con_node, prev, true);
Expand All @@ -252,11 +241,6 @@ void post_optimize_weights::add_lstm_bias_reorder(primitive_id input_id, std::sh
p.add_connection(crop0_node, con_node, 0);
p.add_connection(crop2_node, con_node, 0);
p.add_connection(crop3_node, con_node, 0);
std::string permute_id = input_id + "_pex";
std::vector<uint16_t> ord{0, 3, 2, 1};
auto permute = std::make_shared<cldnn::permute>(permute_id, input_info{concat_id}, ord);
auto& permute_node = p.get_or_create(permute);
p.add_intermediate(permute_node, node, con_node, true);
auto set_implementation_and_output = [this, &p](program_node& node) {
node.get_output_layout(false);
select_implementation(p, node);
Expand All @@ -268,7 +252,6 @@ void post_optimize_weights::add_lstm_bias_reorder(primitive_id input_id, std::sh
set_implementation_and_output(crop2_node);
set_implementation_and_output(crop3_node);
set_implementation_and_output(con_node);
set_implementation_and_output(permute_node);
}

void post_optimize_weights::run(program& p) {
Expand Down
4 changes: 2 additions & 2 deletions src/plugins/intel_gpu/src/graph/impls/ocl/lstm_cell.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include "primitive_base.hpp"

#include "lstm_cell_inst.h"
#include "lstm/lstm_cell_kernel_selector.h"
#include "lstm/lstm_cell_and_seq_kernel_selector.h"
#include "lstm/lstm_kernel_base.h"
#include "openvino/op/lstm_cell.hpp"
#include "lstm_cell.hpp"
Expand All @@ -16,7 +16,7 @@ namespace ocl {
struct lstm_cell_impl : typed_primitive_impl_ocl<lstm_cell> {
using parent = typed_primitive_impl_ocl<lstm_cell>;
using parent::parent;
using kernel_selector_t = kernel_selector::lstm_cell_kernel_selector;
using kernel_selector_t = kernel_selector::lstm_cell_and_seq_kernel_selector;
using kernel_params_t = kernel_selector::lstm_params;

DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::ocl::lstm_cell_impl)
Expand Down
5 changes: 3 additions & 2 deletions src/plugins/intel_gpu/src/graph/impls/ocl/rnn_seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

#include "lstm_seq_inst.h"
#include "rnn_seq.hpp"
#include "lstm/lstm_seq_kernel_selector.h"
#include "lstm/lstm_cell_and_seq_kernel_selector.h"
#include "lstm/lstm_kernel_base.h"
#include "openvino/op/lstm_sequence.hpp"
#include "impls/registry/implementation_manager.hpp"
Expand All @@ -17,7 +17,7 @@ namespace ocl {
struct rnn_seq_impl : typed_primitive_impl_ocl<lstm_seq> {
using parent = typed_primitive_impl_ocl<lstm_seq>;
using parent::parent;
using kernel_selector_t = kernel_selector::lstm_seq_kernel_selector;
using kernel_selector_t = kernel_selector::lstm_cell_and_seq_kernel_selector;
using kernel_params_t = kernel_selector::lstm_params;

DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::ocl::rnn_seq_impl)
Expand Down Expand Up @@ -47,6 +47,7 @@ struct rnn_seq_impl : typed_primitive_impl_ocl<lstm_seq> {
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
const auto& primitive = impl_param.typed_desc<lstm_seq>();
auto params = get_default_params<kernel_selector::lstm_params>(impl_param);
params.sequential = true;
for (size_t i = 1; i < impl_param.input_layouts.size(); ++i) {
params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(i)));
}
Expand Down
13 changes: 10 additions & 3 deletions src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ struct lstm_seq_onednn : typed_primitive_onednn_impl<lstm_seq> {
auto weights_shape = impl_params.get_input_layout(layout_nr).get_shape();
auto target_weights_layout = impl_params.get_input_layout(layout_nr);
target_weights_layout.format = cldnn::format::bfzyx;
auto layout = target_weights_layout.clone_with_other_shape(ov::Shape{weights_shape[0], weights_shape[1], 1, weights_shape[2], weights_shape[3]});
auto layout = target_weights_layout.clone_with_other_shape(ov::Shape{weights_shape[0], weights_shape[1], weights_shape[2], 1, 1});
return layout;
}

Expand Down Expand Up @@ -168,11 +168,18 @@ struct lstm_seq_onednn : typed_primitive_onednn_impl<lstm_seq> {
"[GPU] The format kind of the output memory descriptor of onednn lstm_seq cannot be 'any'.");

auto eng = engine.get_onednn_engine();
dnnl::rnn_direction lstm_desc_dir;
if (direction == ov::op::RecurrentSequenceDirection::FORWARD) {
lstm_desc_dir = dnnl::rnn_direction::unidirectional_left2right;
} else if (direction == ov::op::RecurrentSequenceDirection::REVERSE) {
lstm_desc_dir = dnnl::rnn_direction::unidirectional_right2left;
} else {
lstm_desc_dir = dnnl::rnn_direction::bidirectional_concat;
}
return std::make_shared<dnnl::lstm_forward::primitive_desc>(
eng,
dnnl::prop_kind::forward_inference,
direction == ov::op::RecurrentSequenceDirection::FORWARD ? dnnl::rnn_direction::unidirectional_left2right : \
dnnl::rnn_direction::unidirectional_right2left,
lstm_desc_dir,
input_md,
initial_hidden,
initial_cell,
Expand Down
26 changes: 13 additions & 13 deletions src/plugins/intel_gpu/src/graph/lstm_cell.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,24 @@ namespace cldnn {
GPU_DEFINE_PRIMITIVE_TYPE_ID(lstm_cell)

layout lstm_cell_inst::calc_output_layout(lstm_cell_node const& node, kernel_impl_params const& impl_param) {
auto input_layout = impl_param.get_input_layout(0);
auto input_pshape = input_layout.get_partial_shape();
auto input_layout_hidden = impl_param.get_input_layout(1);
auto input_pshape_hidden = input_layout_hidden.get_partial_shape();
auto lstm_batch_size = input_pshape[0];
auto lstm_hidden_size = input_pshape_hidden[1];
const auto& input_layout = impl_param.get_input_layout(0);
const auto& input_pshape = input_layout.get_partial_shape();
const auto& input_layout_hidden = impl_param.get_input_layout(1);
const auto& input_pshape_hidden = input_layout_hidden.get_partial_shape();
const auto& lstm_batch_size = input_pshape[0];
const auto& lstm_hidden_size = input_pshape_hidden[1];

return cldnn::layout{ov::PartialShape{lstm_batch_size, lstm_hidden_size}, input_layout.data_type, input_layout.format};
}

template<typename ShapeType>
std::vector<layout> lstm_cell_inst::calc_output_layouts(lstm_cell_node const& node, kernel_impl_params const& impl_param) {
auto input_layout = impl_param.get_input_layout(0);
auto input_pshape = input_layout.get_partial_shape();
auto input_layout_hidden = impl_param.get_input_layout(1);
auto input_pshape_hidden = input_layout_hidden.get_partial_shape();
auto lstm_batch_size = input_pshape[0];
auto lstm_hidden_size = input_pshape_hidden[1];
const auto& input_layout = impl_param.get_input_layout(0);
const auto& input_pshape = input_layout.get_partial_shape();
const auto& input_layout_hidden = impl_param.get_input_layout(1);
const auto& input_pshape_hidden = input_layout_hidden.get_partial_shape();
const auto& lstm_batch_size = input_pshape[0];
const auto& lstm_hidden_size = input_pshape_hidden[1];

auto out_layout = cldnn::layout{ShapeType{lstm_batch_size, lstm_hidden_size}, input_layout.data_type, input_layout.format};
return {out_layout, out_layout};
Expand All @@ -36,7 +36,7 @@ std::vector<layout> lstm_cell_inst::calc_output_layouts(lstm_cell_node const& no
template std::vector<layout> lstm_cell_inst::calc_output_layouts<ov::PartialShape>(lstm_cell_node const& node, const kernel_impl_params& impl_param);

std::string lstm_cell_inst::to_string(lstm_cell_node const& node) {
auto desc = node.get_primitive();
const auto& desc = node.get_primitive();
auto node_info = node.desc_to_json();

std::stringstream primitive_description;
Expand Down
36 changes: 17 additions & 19 deletions src/plugins/intel_gpu/src/graph/lstm_seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ namespace cldnn {
GPU_DEFINE_PRIMITIVE_TYPE_ID(lstm_seq)

layout lstm_seq_inst::calc_output_layout(lstm_seq_node const& node, kernel_impl_params const& impl_param) {
auto desc = impl_param.typed_desc<lstm_seq>();
auto input_layout = impl_param.get_input_layout(0);
auto input_pshape = input_layout.get_partial_shape();
auto input_layout_hidden = impl_param.get_input_layout(1);
auto input_pshape_hidden = input_layout_hidden.get_partial_shape();
auto lstm_batch_size = input_pshape[0];
auto lstm_seq_length = input_pshape[1];
auto lstm_hidden_size = input_pshape_hidden[2];
const auto& desc = impl_param.typed_desc<lstm_seq>();
const auto& input_layout = impl_param.get_input_layout(0);
const auto& input_pshape = input_layout.get_partial_shape();
const auto& input_layout_hidden = impl_param.get_input_layout(1);
const auto& input_pshape_hidden = input_layout_hidden.get_partial_shape();
const auto& lstm_batch_size = input_pshape[0];
const auto& lstm_seq_length = input_pshape[1];
const auto& lstm_hidden_size = input_pshape_hidden[2];

auto first_out_fmt = cldnn::format::bfyx;
if (node.get_preferred_impl_type() == impl_types::onednn && node.get_preferred_output_fmt() != format::any) {
Expand All @@ -29,22 +29,20 @@ layout lstm_seq_inst::calc_output_layout(lstm_seq_node const& node, kernel_impl_

template<typename ShapeType>
std::vector<layout> lstm_seq_inst::calc_output_layouts(lstm_seq_node const& node, kernel_impl_params const& impl_param) {
auto desc = impl_param.typed_desc<lstm_seq>();
auto input_layout = impl_param.get_input_layout(0);
auto input_pshape = input_layout.get_partial_shape();
auto input_layout_hidden = impl_param.get_input_layout(1);
auto input_pshape_hidden = input_layout_hidden.get_partial_shape();
auto lstm_batch_size = input_pshape[0];
auto lstm_seq_length = input_pshape[1];
auto lstm_hidden_size = input_pshape_hidden[2];
const auto& desc = impl_param.typed_desc<lstm_seq>();
const auto& input_layout = impl_param.get_input_layout(0);
const auto& input_pshape = input_layout.get_partial_shape();
const auto& input_layout_hidden = impl_param.get_input_layout(1);
const auto& input_pshape_hidden = input_layout_hidden.get_partial_shape();
const auto& lstm_batch_size = input_pshape[0];
const auto& lstm_seq_length = input_pshape[1];
const auto& lstm_hidden_size = input_pshape_hidden[2];

auto first_out_fmt = cldnn::format::bfyx;
auto second_out_fmt = input_layout.format;
auto third_out_fmt = input_layout.format;
if (node.get_preferred_impl_type() == impl_types::onednn && node.get_preferred_output_fmt() != format::any) {
first_out_fmt = node.get_preferred_output_fmt();
second_out_fmt = node.get_preferred_output_fmt(1);
third_out_fmt = node.get_preferred_output_fmt(2);
}
auto num_directions = desc->num_directions();

Expand All @@ -56,7 +54,7 @@ std::vector<layout> lstm_seq_inst::calc_output_layouts(lstm_seq_node const& node
template std::vector<layout> lstm_seq_inst::calc_output_layouts<ov::PartialShape>(lstm_seq_node const& node, const kernel_impl_params& impl_param);

std::string lstm_seq_inst::to_string(lstm_seq_node const& node) {
auto desc = node.get_primitive();
const auto& desc = node.get_primitive();
auto node_info = node.desc_to_json();

std::stringstream primitive_description;
Expand Down
Loading

0 comments on commit e1fcc01

Please sign in to comment.