diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp
index a601b2c74c09e3..17498831a542d1 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp
@@ -50,7 +50,7 @@ struct FullyConnectedImplementationManager : public ImplementationManager {
         bool compressed_case = fc_prim->compressed_weights &&
                                one_of(in0_dt, {data_types::f16, data_types::f32, data_types::i8}) &&
                                one_of(wei_dt, {data_types::u8, data_types::i8, data_types::u4, data_types::i4}) &&
-                               one_of(out_dt, {data_types::f16, data_types::f32});
+                               one_of(out_dt, {data_types::f16, data_types::f32, data_types::u8, data_types::i8});
         if (!f16f16_case && !f32f32_case && !u8s8_case && !compressed_case)
             return false;
 
diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/permute_f_y_axes.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/permute_f_y_axes.cl
index 6df614c3328dd9..3aafc2c727b345 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/permute_f_y_axes.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/permute_f_y_axes.cl
@@ -43,7 +43,7 @@ KERNEL (permute_f_y_axes)(
         result = FUSED_OPS_RESULT_VEC;
 #else
         IN_VEC_TYPE res = READ_VEC(0, &input[INPUT0_GET_INDEX(b_idx, f_idx, y_idx, x_idx)]);
-        OUT_VEC_TYPE result = ACTIVATION(res, ACTIVATION_PARAMS);
+        OUT_VEC_TYPE result = TO_OUT_VEC_TYPE(ACTIVATION(res, ACTIVATION_PARAMS));
 #endif
         const int output_idx = OUTPUT_GET_INDEX(b_idx, f_out_idx, y_out_idx, x_idx);
         WRITE_VEC(result, 0, &output[output_idx]);
diff --git a/src/plugins/intel_gpu/src/plugin/ops/fully_connected.cpp b/src/plugins/intel_gpu/src/plugin/ops/fully_connected.cpp
index 7c0c570f7cf54c..7b0aa921ef3ad5 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/fully_connected.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/fully_connected.cpp
@@ -68,6 +68,34 @@ static void CreateFullyConnectedCompressedOp(ProgramBuilder& p, const std::share
     }
 
     p.add_primitive(*op, fc);
+
+    if (op->get_input_partial_shape(0).size() > 3 && !p.use_new_shape_infer()) {
+        auto lastLayerName = primitive_name;
+        auto outReshapeName = primitive_name + "_cldnn_out_reshape";
+
+        // add reorder
+        auto outDims = op->get_output_shape(0);
+        auto outTensor = tensor_from_dims(outDims);
+
+        if (outDims.size() > 4) {
+            cldnn::format outputFormat = cldnn::format::bfyx;
+            switch (outDims.size()) {
+                case 5: outputFormat = cldnn::format::bfzyx; break;
+                case 6: outputFormat = cldnn::format::bfwzyx; break;
+                default: break;
+            }
+
+            cldnn::primitive_id reorderId = "reorder:" + outReshapeName + "_reorder";
+            cldnn::layout outputLayout(cldnn::element_type_to_data_type(op->get_output_element_type(0)), outputFormat, outTensor);
+            auto reorder_prim = cldnn::reorder(reorderId, cldnn::input_info(primitive_name), outputLayout);
+            p.add_primitive(*op, reorder_prim);
+            lastLayerName = reorderId;
+        }
+
+        // add reshape
+        auto outReshapePrim = cldnn::reshape(outReshapeName, cldnn::input_info(lastLayerName), outTensor);
+        p.add_primitive(*op, outReshapePrim);
+    }
 }
 
 static void CreateFullyConnectedOp(ProgramBuilder& p, const std::shared_ptr<op::FullyConnected>& op) {
diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
index db93696865a971..5d5f901deeaf1f 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -15,8 +15,11 @@
 #include "intel_gpu/plugin/transformations_pipeline.hpp"
 #include "intel_gpu/runtime/debug_configuration.hpp"
 #include "intel_gpu/runtime/itt.hpp"
+#include "low_precision/add.hpp"
 #include "low_precision/convolution.hpp"
 #include "low_precision/convolution_backprop_data.hpp"
+#include "low_precision/fold_convert.hpp"
+#include "low_precision/fuse_convert.hpp"
 #include "low_precision/group_convolution.hpp"
 #include "low_precision/low_precision.hpp"
 #include "low_precision/mat_mul.hpp"
@@ -25,7 +28,9 @@
 #include "low_precision/pull_reshape_through_dequantization.hpp"
 #include "low_precision/pull_transpose_through_dequantization.hpp"
 #include "low_precision/recurrent_cell.hpp"
+#include "low_precision/rt_info/bias_attribute.hpp"
 #include "low_precision/strided_slice.hpp"
+#include "low_precision/transpose.hpp"
 #include "openvino/core/deprecated.hpp"
 #include "openvino/core/type/element_type.hpp"
 #include "openvino/core/validation_util.hpp"
@@ -46,6 +51,7 @@
 #include "openvino/op/reshape.hpp"
 #include "openvino/op/rnn_cell.hpp"
 #include "openvino/op/rnn_sequence.hpp"
+#include "openvino/op/scaled_dot_product_attention.hpp"
 #include "openvino/op/squeeze.hpp"
 #include "openvino/op/unsqueeze.hpp"
 #include "openvino/op/util/sub_graph_base.hpp"
@@ -169,7 +175,17 @@ static bool disable_reduce_decomposition(const std::shared_ptr<const ov::Node> n
     return false;
 }
 
-static bool is_decompression_multiply(const std::shared_ptr<const ov::Node> node) {
+static bool is_decompression_multiply(const std::shared_ptr<const ov::Node> node, bool supports_immad) {
+    std::vector<ov::DiscreteTypeInfo> target_consumers = { ov::opset1::MatMul::get_type_info_static(),
+                                                           ov::op::v8::Gather::get_type_info_static(),
+                                                           ov::op::v1::Convolution::get_type_info_static(),
+                                                           ov::opset1::Convolution::get_type_info_static(),
+                                                           ov::opset1::GroupConvolution::get_type_info_static() };
+
+    std::vector<ov::DiscreteTypeInfo> convolutions = { ov::op::v1::Convolution::get_type_info_static(),
+                                                       ov::opset1::Convolution::get_type_info_static(),
+                                                       ov::opset1::GroupConvolution::get_type_info_static() };
+
     auto all_has_types = [](const std::set<ov::Input<ov::Node>>& consumers, const std::vector<ov::DiscreteTypeInfo>& types) {
         return std::all_of(consumers.begin(), consumers.end(), [&types](const ov::Input<ov::Node>& input) {
             return cldnn::one_of(input.get_node()->get_type_info(), types);
@@ -177,27 +193,47 @@ static bool is_decompression_multiply(const std::shared_ptr<const ov::Node> node
     };
 
     const auto consumers = node->get_output_target_inputs(0);
-    if (all_has_types(consumers, { ov::op::v0::MatMul::get_type_info_static(), ov::op::v8::Gather::get_type_info_static() }))
-        return true;
 
-    auto are_multiply_from_decompression = [&all_has_types](const ov::Input<ov::Node> consumer) {
+    for (const auto& consumer : consumers) {
+        const auto& type_info = consumer.get_node()->get_type_info();
+        if (cldnn::one_of(type_info, target_consumers)) {
+            if (cldnn::one_of(type_info, convolutions) && consumer.get_node()->input_value(0).get_partial_shape().is_dynamic()) {
+                return false;
+            }
+            return true;
+        }
+    }
+
+    auto are_multiply_from_decompression = [&](const ov::Input<ov::Node> consumer) {
         if (!cldnn::one_of(consumer.get_node()->get_type_info(), { ov::op::v1::Multiply::get_type_info_static() }))
             return false;
         const auto child_consumers = consumer.get_node()->get_output_target_inputs(0);
-        if (all_has_types(child_consumers, { ov::opset1::MatMul::get_type_info_static(), ov::op::v8::Gather::get_type_info_static() }))
-            return true;
+
+        for (const auto& child_consumer : child_consumers) {
+            const auto& type_info = child_consumer.get_node()->get_type_info();
+            if (cldnn::one_of(type_info, target_consumers)) {
+                if (cldnn::one_of(type_info, convolutions) && child_consumer.get_node()->input_value(0).get_partial_shape().is_dynamic()) {
+                    return false;
+                }
+                return true;
+            }
+        }
         return false;
     };
 
-    auto are_converts_from_decompression = [&all_has_types, &are_multiply_from_decompression](const std::set<ov::Input<ov::Node>>& consumers) {
+    auto are_converts_from_decompression = [&](const std::set<ov::Input<ov::Node>>& consumers) {
         if (!all_has_types(consumers, { ov::opset1::Convert::get_type_info_static() }))
             return false;
         for (const auto& consumer : consumers) {
             const auto child_consumers = consumer.get_node()->get_output_target_inputs(0);
             for (const auto& child_consumer : child_consumers) {
                 const auto& type_info = child_consumer.get_node()->get_type_info();
-                if (cldnn::one_of(type_info, { ov::opset1::MatMul::get_type_info_static(), ov::op::v8::Gather::get_type_info_static() }))
-                    continue;
+                if (cldnn::one_of(type_info, target_consumers)) {
+                    if (cldnn::one_of(type_info, convolutions) && child_consumer.get_node()->input_value(0).get_partial_shape().is_dynamic()) {
+                        return false;
+                    }
+                    return true;
+                }
                 if (are_multiply_from_decompression(child_consumer)) {
                     continue;
                 }
@@ -210,9 +246,16 @@ static bool is_decompression_multiply(const std::shared_ptr<const ov::Node> node
     if (all_has_types(consumers, { ov::opset1::Reshape::get_type_info_static() })) {
         for (const auto& consumer : consumers) {
             const auto child_consumers = consumer.get_node()->get_output_target_inputs(0);
-            if (all_has_types(child_consumers, { ov::opset1::MatMul::get_type_info_static(), ov::op::v8::Gather::get_type_info_static() }) ||
-                are_converts_from_decompression(child_consumers)) {
-                return true;
+            for (const auto& child_consumer : child_consumers) {
+                const auto& type_info = child_consumer.get_node()->get_type_info();
+                if (cldnn::one_of(type_info, target_consumers)) {
+                    if (cldnn::one_of(type_info, convolutions) && child_consumer.get_node()->input_value(0).get_partial_shape().is_dynamic()) {
+                        return false;
+                    }
+                    return true;
+                } else if (are_converts_from_decompression(child_consumers)) {
+                    return true;
+                }
             }
         }
     }
@@ -330,13 +373,9 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         // it expects to have the same data type for weights and zero points (apply it only for u8 data type, since other compression
         // types are not supported by oneDNN)
         manager.register_pass<ov::pass::MarkDequantizationSubgraph>(supported_woq_types, !device_info.supports_immad);
-
-        // Need to check if transformations work correctly for mixed models with both compression and quantization at the same time.
-        if (!is_model_quantized) {
-            pass_config->set_callback<ov::pass::MarkDequantizationSubgraph>([&](const std::shared_ptr<const ov::Node> node) {
-                return !is_decompression_multiply(node);
-            });
-        }
+        pass_config->set_callback<ov::pass::MarkDequantizationSubgraph>([&](const std::shared_ptr<const ov::Node> node) {
+            return !is_decompression_multiply(node, device_info.supports_immad);
+        });
 
         const bool keep_precision_sensitive_in_fp32_1 = true;
         const bool convert_input_output_precision = false;
@@ -705,12 +744,6 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         auto lptPassConfig = lptManager.get_pass_config();
         // quantized LSTMSequence / GPUSequence are not supported yet. Avoid extra transformation
         lptPassConfig->disable<ov::pass::low_precision::RecurrentCellTransformation>();
-        lptPassConfig->set_callback<ov::pass::low_precision::MarkupPrecisions>([](const_node_ptr& node) -> bool {
-            if (const auto mulitply = std::dynamic_pointer_cast<const ov::op::v1::Multiply>(node)) {
-                return !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(mulitply);
-            }
-            return false;
-        });
         lptPassConfig->set_callback<ConvolutionBackpropDataTransformation>([func, defaultPrecisions](const_node_ptr& node) -> bool {
             auto fillStaticChannel = [func](const ov::PartialShape& shape, size_t& channel) -> bool {
                 const auto rank = shape.rank();
@@ -747,6 +780,40 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
                 || WeightableLayerTransformation::isAsymmetricOnWeights(node, defaultPrecisions);
         });
 
+        lptPassConfig->set_callback<TransposeTransformation>([&](const_node_ptr& node) -> bool {
+            for (auto& user : node->get_users()) {
+                if (ov::is_type<ov::op::v13::ScaledDotProductAttention>(user))
+                    return true;
+            }
+
+            return false;
+        });
+
+        lptPassConfig->set_callback<MarkupPrecisions>([](const_node_ptr& node) -> bool {
+            return ov::is_type<ov::opset1::Multiply>(node) && !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(node);
+        });
+
+        lptPassConfig->set_callback<FoldConvertTransformation>([&](const_node_ptr& node) -> bool {
+            const auto& consumers = node->get_output_target_inputs(0);
+            if (consumers.size() == 1) {
+                const auto consumer = consumers.begin()->get_node()->shared_from_this();
+                return ov::is_type<ov::opset1::Multiply>(consumer) && is_decompression_multiply(consumer, device_info.supports_immad);
+            }
+            return false;
+        });
+        lptPassConfig->set_callback<FuseConvertTransformation>([&](const_node_ptr& node) -> bool {
+            if (ov::is_type<ov::opset1::Multiply>(node)) {
+                return ov::is_type<ov::opset1::Multiply>(node) && is_decompression_multiply(node, device_info.supports_immad);
+            } else if (ov::is_type<ov::opset1::Subtract>(node)) {
+                const auto& consumers = node->get_output_target_inputs(0);
+                if (consumers.size() == 1) {
+                    const auto consumer = consumers.begin()->get_node()->shared_from_this();
+                    return ov::is_type<ov::opset1::Multiply>(consumer) && is_decompression_multiply(consumer, device_info.supports_immad);
+                }
+            }
+            return false;
+        });
+
         lptPassConfig->set_callback<MultiplyToGroupConvolutionTransformation>([&](const_node_ptr& node) -> bool {
             // disable MultiplyToGroupConvolution if Multiply with Constant can be fused
 
diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/hybrid.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/hybrid.cpp
new file mode 100644
index 00000000000000..f7de00d77a9bd9
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/hybrid.cpp
@@ -0,0 +1,379 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "common_test_utils/ov_tensor_utils.hpp"
+#include "shared_test_classes/base/ov_subgraph.hpp"
+#include "transformations/rt_info/decompression.hpp"
+
+#include "openvino/op/parameter.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/result.hpp"
+#include "openvino/op/matmul.hpp"
+#include "openvino/op/reshape.hpp"
+#include "openvino/op/convert.hpp"
+#include "openvino/op/subtract.hpp"
+#include "openvino/op/transpose.hpp"
+
+namespace {
+using ov::test::InputShape;
+
+struct ShapeParams {
+    ShapeParams() = default;
+    ShapeParams(InputShape data_shape, ov::Shape weights_shape, int weights_group_size = -1)
+        : data_shape(std::move(data_shape)),
+          weights_shape(std::move(weights_shape)),
+          weights_group_size(weights_group_size) {}
+
+    InputShape data_shape;
+    ov::Shape weights_shape;
+    // Decompression group size. If the value is equal to -1, ordinary decompression is used
+    int weights_group_size;
+};
+
+const std::vector<ov::element::Type> activations_precisions = {ov::element::f32, ov::element::f16};
+const std::vector<ov::element::Type> weights_precisions = {ov::element::u8, ov::element::u4, ov::element::i4};
+const std::vector<bool> transpose_weights = {true, false};
+
+const std::vector<ShapeParams> input_shapes_basic = {
+    {{{-1, -1, -1}, {{1, 8, 4}, {1, 8, 4}}}, {4, 4}},
+};
+
+const std::vector<bool> add_decompression_sub = {true, false};
+const std::vector<bool> reshape_on_decompression = {true, false};
+const std::vector<bool> per_tensor_zp = {true, false};
+
+using MatmulWeightsDecompressionQuantizeConvolutionParams = std::tuple<ShapeParams,   // input shapes
+                                                            ov::element::Type,        // weights type
+                                                            ov::element::Type,        // activations type
+                                                            bool,                     // transpose on weights
+                                                            bool,                     // decompression subtract
+                                                            bool,                     // reshape on decompression constants
+                                                            bool,                     // extra multiply
+                                                            bool,                     // per-tensor zero-point
+                                                            uint64_t                  // dynamic_quantization_group_size
+                                                            >;
+
+class MatmulWeightsDecompressionQuantizeConvolution : public testing::WithParamInterface<MatmulWeightsDecompressionQuantizeConvolutionParams>,
+                                                      virtual public ov::test::SubgraphBaseTest {
+public:
+    static std::string get_test_case_name(testing::TestParamInfo<MatmulWeightsDecompressionQuantizeConvolutionParams> obj) {
+        ShapeParams shape_params;
+        ov::element::Type weights_precision;
+        ov::element::Type activations_precision;
+        bool transpose;
+        bool decompression_sub;
+        bool reshape_on_decompression;
+        bool extra_multiply;
+        bool per_tensor_zp;
+        uint64_t dyn_quan_group_size;
+
+        std::tie(shape_params,
+                 weights_precision,
+                 activations_precision,
+                 transpose,
+                 decompression_sub,
+                 reshape_on_decompression,
+                 extra_multiply,
+                 per_tensor_zp,
+                 dyn_quan_group_size) = obj.param;
+
+        std::ostringstream result;
+        result << "data_shape=";
+        result << ov::test::utils::partialShape2str({shape_params.data_shape.first}) << "_";
+        for (const auto& actual_shape : shape_params.data_shape.second) {
+            result << ov::test::utils::partialShape2str({actual_shape}) << "_";
+        }
+        result << "_" << "weights_shape=" << shape_params.weights_shape << "_";
+        result << "group_size=" << (shape_params.weights_group_size == -1 ? 1111 : shape_params.weights_group_size) << "_";
+        result << "weights_precision=" << weights_precision << "_";
+        result << "activations_precision=" << activations_precision << "_";
+        result << "transpose_weights=" << transpose << "_";
+        result << "decompression_subtract=" << decompression_sub << "_";
+        result << "reshape_on_decompression=" << reshape_on_decompression << "_";
+        result << "extra_multiply=" << extra_multiply << "_";
+        result << "per_tensor_zp=" << per_tensor_zp << "_";
+        result << "dyn_quan_group_size=" << dyn_quan_group_size << "_";
+        result << "quantize_conv";
+
+        return result.str();
+    }
+protected:
+    std::shared_ptr<ov::Model> init_subgraph(const ov::PartialShape& data_shape,
+                                              const ov::Shape& weights_shape,
+                                              const int group_size,
+                                              const ov::element::Type data_precision,
+                                              const ov::element::Type weights_precision,
+                                              const bool transpose_weights,
+                                              const bool add_subtract,
+                                              const bool reshape_on_decompression,
+                                              const bool extra_multiply,
+                                              const bool per_tensor_zp) {
+        ov::ParameterVector params{std::make_shared<ov::op::v0::Parameter>(data_precision, data_shape)};
+        const auto weights_subgraph = init_compressed_weights_subgraph(weights_shape,
+                                                                       group_size,
+                                                                       data_precision,
+                                                                       weights_precision,
+                                                                       transpose_weights,
+                                                                       add_subtract,
+                                                                       reshape_on_decompression,
+                                                                       extra_multiply,
+                                                                       per_tensor_zp);
+
+        auto mat_mul = std::make_shared<ov::op::v0::MatMul>(params[0], weights_subgraph);
+
+        std::vector<int> shape_pattern = {1, -1, 2, 4};
+        auto shapePatternsNode = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape({4}), shape_pattern);
+        auto reshape = std::make_shared<ov::op::v1::Reshape>(mat_mul, shapePatternsNode, false);
+        reshape->set_friendly_name("reshape");
+
+        auto conv = init_quantized_convolution_subgraph(reshape);
+        return std::make_shared<ov::Model>(ov::NodeVector{conv}, params, "MatmulWeightsDecompressionQuantizeConvolution");
+    }
+
+    std::shared_ptr<ov::Node> init_compressed_weights_subgraph(const ov::Shape& weights_shape,
+                                                               const int group_size,
+                                                               const ov::element::Type data_precision,
+                                                               const ov::element::Type weights_precision,
+                                                               const bool transpose_weights,
+                                                               const bool add_subtract,
+                                                               const bool reshape_on_decompression_constant,
+                                                               const bool extra_multiply,
+                                                               const bool per_tensor_zp) {
+        auto transpose_if_necessary = [&](const ov::Shape& shape) {
+            auto result_shape = shape;
+            if (transpose_weights)
+                std::swap(*result_shape.rbegin(), *(result_shape.rbegin() + 1));
+            return result_shape;
+        };
+
+        const bool group_decompression = group_size != -1;
+        // Weights has shape [I, O], where
+        // I - input channels
+        // O - output channels
+        // In case of group decompression, input channels dimension is split into 2: I -> [N, G], where
+        // N - number of groups
+        // G - group size
+        auto transformed_weights_shape = transpose_if_necessary(weights_shape);
+        if (group_decompression) {
+            OPENVINO_ASSERT(weights_shape[0] % group_size == 0,
+                            "Weights output channels count (",
+                            weights_shape[0],
+                            ") must be divisible by decompression group size (",
+                            group_size,
+                            ").");
+            auto in_channel_idx = transpose_weights ? transformed_weights_shape.size() - 1 : transformed_weights_shape.size() - 2;
+            transformed_weights_shape[in_channel_idx] = weights_shape[0] / group_size;
+            transformed_weights_shape.insert(transformed_weights_shape.begin() + in_channel_idx + 1, group_size);
+        }
+        auto weights_tensor = ov::test::utils::create_and_fill_tensor(weights_precision, transformed_weights_shape);
+        auto weights = std::make_shared<ov::op::v0::Constant>(weights_tensor);
+        weights->set_friendly_name("Compressed_weights");
+        auto weights_convert = std::make_shared<ov::op::v0::Convert>(weights, data_precision);
+
+        std::shared_ptr<ov::Node> mul_parent = weights_convert;
+        auto output_channels = *weights_shape.rbegin();
+
+        // Decompression constants shape:
+        // Ordinary decompression: [O, 1]
+        // Group decompression: [O, N, 1]
+        ov::Shape scaleshift_target_shape{output_channels};
+        scaleshift_target_shape.insert(scaleshift_target_shape.begin(), group_decompression ? weights_shape[0] / group_size : 1);
+        scaleshift_target_shape = transpose_if_necessary(scaleshift_target_shape);
+        if (group_decompression) {
+            auto in_channel_idx = transpose_weights ? scaleshift_target_shape.size() - 1 : scaleshift_target_shape.size() - 2;
+            scaleshift_target_shape.insert(scaleshift_target_shape.begin() + in_channel_idx + 1, 1);
+        }
+
+        auto scaleshift_const_shape = scaleshift_target_shape;
+        if (reshape_on_decompression_constant)
+            scaleshift_const_shape.erase(std::remove(scaleshift_const_shape.begin(), scaleshift_const_shape.end(), 1), scaleshift_const_shape.end());
+        if (add_subtract) {
+            auto shift_tensor_shape = per_tensor_zp ? ov::Shape{1} : scaleshift_const_shape;
+            auto shift_tensor = ov::test::utils::create_and_fill_tensor(weights_precision, shift_tensor_shape);
+            if (per_tensor_zp && weights_precision.bitwidth() == 4) {
+                static_cast<uint8_t*>(shift_tensor.data())[0] = 0x88;
+            }
+            auto shift_const = std::make_shared<ov::op::v0::Constant>(shift_tensor);
+            std::shared_ptr<ov::Node> shift_convert = std::make_shared<ov::op::v0::Convert>(shift_const, data_precision);
+            if (reshape_on_decompression_constant && !per_tensor_zp) {
+                auto shift_reshape_const = ov::op::v0::Constant::create(ov::element::i32, {scaleshift_target_shape.size()}, scaleshift_target_shape);
+                auto shift_reshape = std::make_shared<ov::op::v1::Reshape>(shift_convert, shift_reshape_const, false);
+                shift_convert = shift_reshape;
+            }
+            mul_parent = std::make_shared<ov::op::v1::Subtract>(weights_convert, shift_convert);
+        }
+
+        ov::test::utils::InputGenerateData in_data;
+        in_data.start_from = -0.5;
+        in_data.range = 1;
+        in_data.resolution = 30000;
+        auto scale_tensor = ov::test::utils::create_and_fill_tensor(data_precision, scaleshift_const_shape, in_data);
+        for (size_t i = 0; i < scale_tensor.get_size(); i++) {
+            if (data_precision == ov::element::f16)
+                scale_tensor.data<ov::float16>()[i] /= ov::float16(16.f);
+            else if (data_precision == ov::element::f32)
+                scale_tensor.data<float>()[i] /= 16.f;
+        }
+        std::shared_ptr<ov::Node> scale_const = std::make_shared<ov::op::v0::Constant>(scale_tensor);
+        if (reshape_on_decompression_constant) {
+            auto scale_reshape_const = ov::op::v0::Constant::create(ov::element::i32, {scaleshift_target_shape.size()}, scaleshift_target_shape);
+            auto scale_reshape = std::make_shared<ov::op::v1::Reshape>(scale_const, scale_reshape_const, false);
+            scale_const = scale_reshape;
+        }
+        std::shared_ptr<ov::Node> last_node = std::make_shared<ov::op::v1::Multiply>(mul_parent, scale_const);
+
+        if (group_decompression) {
+            auto reshape_target_shape = transpose_weights ? std::vector<int>{-1, static_cast<int>(weights_shape[0])}
+                                                          : std::vector<int>{static_cast<int>(weights_shape[0]), -1};
+            auto target_shape_node = ov::op::v0::Constant::create(ov::element::i32, {reshape_target_shape.size()}, reshape_target_shape);
+            last_node = std::make_shared<ov::op::v1::Reshape>(last_node, target_shape_node, false);
+        }
+        if (transpose_weights) {
+            const size_t rank = last_node->get_output_partial_shape(0).size();
+            std::vector<int> order(rank);
+            std::iota(order.begin(), order.end(), 0);
+            std::swap(*order.rbegin(), *(order.rbegin() + 1));
+            auto transpose_constant = ov::op::v0::Constant::create(ov::element::i32, {rank}, order);
+            last_node = std::make_shared<ov::op::v1::Transpose>(last_node, transpose_constant);
+        } else if (extra_multiply) {
+            last_node = std::make_shared<ov::op::v1::Multiply>(last_node, scale_const);
+        }
+        return last_node;
+    }
+
+    std::shared_ptr<ov::Node> init_quantized_convolution_subgraph(std::shared_ptr<ov::Node> data) {
+        size_t input_channels = 4;
+        size_t output_channels = 4;
+        ov::Shape input_intervals_shape;
+        ov::Shape weights_intervals_shape;
+        ov::Shape weights_shape1{output_channels, input_channels, 1, 1};
+
+        auto low_act = ov::op::v0::Constant::create(ov::element::f32, input_intervals_shape, {0});
+        auto high_act = ov::op::v0::Constant::create(ov::element::f32, input_intervals_shape, {20});
+        auto low_weights = ov::op::v0::Constant::create(ov::element::f32, weights_intervals_shape, {-0.72519057});
+        auto high_weights = ov::op::v0::Constant::create(ov::element::f32, weights_intervals_shape, {0.72519057});
+        std::shared_ptr<ov::Node> activations = nullptr;
+        auto weights_tensor = ov::test::utils::create_and_fill_tensor_real_distribution(ov::element::f32, weights_shape1, -0.5f, 0.5f, 1);
+        std::shared_ptr<ov::Node> weights = std::make_shared<ov::op::v0::Constant>(weights_tensor);
+
+        auto output_low_act =  ov::op::v0::Constant::create(ov::element::f32, ov::Shape{}, {0});
+        auto output_high_act =  ov::op::v0::Constant::create(ov::element::f32, ov::Shape{}, {255});
+        activations = std::make_shared< ov::op::v0::FakeQuantize>(data, low_act, high_act, output_low_act, output_high_act, 256);
+        activations = std::make_shared< ov::op::v0::Convert>(activations, ov::element::u8);
+        activations = std::make_shared< ov::op::v0::Convert>(activations, ov::element::f32);
+        std::shared_ptr<ov::Node> shift_act =  ov::op::v0::Constant::create(ov::element::u8, input_intervals_shape, {0});
+        shift_act = std::make_shared< ov::op::v0::Convert>(shift_act, ov::element::f32);
+        activations = std::make_shared<ov::op::v1::Subtract>(activations, shift_act);
+        auto scale_act = ov::op::v0::Constant::create(ov::element::f32, input_intervals_shape, {20.0 / 255.0});
+        activations = std::make_shared<ov::op::v1::Multiply>(activations, scale_act);
+
+        auto output_low_weights = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{}, {0});
+        auto output_high_weights = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{}, {254});
+        weights = std::make_shared<ov::op::v0::FakeQuantize>(weights, low_weights, high_weights, output_low_weights, output_high_weights, 255);
+        weights = std::make_shared<ov::op::v0::Convert>(weights, ov::element::i8);
+        weights = std::make_shared<ov::op::v0::Convert>(weights, ov::element::f32);
+        std::shared_ptr<ov::Node> shift_weights = ov::op::v0::Constant::create(ov::element::i8, weights_intervals_shape, {0});
+        shift_weights = std::make_shared<ov::op::v0::Convert>(shift_weights, ov::element::f32);
+        weights = std::make_shared<ov::op::v1::Subtract>(weights, shift_weights);
+        auto scale_weights = ov::op::v0::Constant::create(ov::element::f32, weights_intervals_shape, {2.0 / 255.0});
+        weights = std::make_shared<ov::op::v1::Multiply>(weights, scale_weights);
+
+        std::shared_ptr<ov::Node> conv;
+        conv = std::make_shared<ov::op::v1::Convolution>(activations, weights, ov::Strides{1, 1},
+                    ov::CoordinateDiff{0, 0}, ov::CoordinateDiff{0, 0}, ov::Strides{1, 1});
+        return conv;
+    }
+
+    void SetUp() override {
+        targetDevice = ov::test::utils::DEVICE_GPU;
+
+        ShapeParams shape_params;
+        ov::element::Type weights_precision;
+        ov::element::Type activations_precision;
+        bool transpose_weights;
+        bool decompression_sub;
+        bool reshape_on_decompression;
+        bool extra_multiply;
+        bool per_tensor_zp;
+        uint64_t dyn_quan_group_size;
+
+        std::tie(shape_params,
+                 weights_precision,
+                 activations_precision,
+                 transpose_weights,
+                 decompression_sub,
+                 reshape_on_decompression,
+                 extra_multiply,
+                 per_tensor_zp,
+                 dyn_quan_group_size) = GetParam();
+
+        init_input_shapes({shape_params.data_shape, {{}, {{shape_params.weights_shape}}}});
+
+        inType = outType = activations_precision;
+
+        function = init_subgraph(inputDynamicShapes[0],
+                                 shape_params.weights_shape,
+                                 shape_params.weights_group_size,
+                                 activations_precision,
+                                 weights_precision,
+                                 transpose_weights,
+                                 decompression_sub,
+                                 reshape_on_decompression,
+                                 extra_multiply,
+                                 per_tensor_zp);
+
+
+        if (activations_precision == ov::element::f16) {
+            abs_threshold = 1.0f;
+        } else {
+            abs_threshold = 1e-4f;
+        }
+
+        this->configuration.insert({ov::hint::dynamic_quantization_group_size(dyn_quan_group_size)});
+    }
+
+    void generate_inputs(const std::vector<ov::Shape>& target_input_static_shapes) override {
+          inputs.clear();
+          const auto& model_inputs = function->inputs();
+          for (size_t i = 0; i < model_inputs.size(); ++i) {
+                const auto& model_input = model_inputs[i];
+                ov::test::utils::InputGenerateData in_data;
+                in_data.start_from = -1;
+                in_data.range = 2;
+                in_data.resolution = 10000;
+                ov::Tensor tensor = ov::test::utils::create_and_fill_tensor(model_input.get_element_type(), target_input_static_shapes[i], in_data);
+                inputs.insert({model_input.get_node_shared_ptr(), tensor});
+          }
+    }
+
+    void check_results() {
+        const auto& test_param = GetParam();
+        ov::element::Type weights_precision = std::get<1>(test_param);
+        for (const auto& n : compiledModel.get_runtime_model()->get_ordered_ops()) {
+            if (n->get_friendly_name() == "Compressed_weights") {
+                ASSERT_EQ(n->get_output_element_type(0), weights_precision);
+            }
+        }
+    }
+};
+
+TEST_P(MatmulWeightsDecompressionQuantizeConvolution, Inference) {
+    run();
+    check_results();
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_MatmulWeightsDecompressionQuantizeConvolution_basic,
+                         MatmulWeightsDecompressionQuantizeConvolution,
+                         ::testing::Combine(::testing::ValuesIn(input_shapes_basic),
+                                            ::testing::ValuesIn(weights_precisions),
+                                            ::testing::ValuesIn(activations_precisions),
+                                            ::testing::ValuesIn(transpose_weights),
+                                            ::testing::Values(true),
+                                            ::testing::Values(true),
+                                            ::testing::Values(false),
+                                            ::testing::Values(false),
+                                            ::testing::Values(0)),
+                         MatmulWeightsDecompressionQuantizeConvolution::get_test_case_name);
+
+} // namespace