forked from openvinotoolkit/openvino
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
62 changed files
with
3,318 additions
and
239 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
57 changes: 57 additions & 0 deletions
57
src/plugins/intel_gpu/include/intel_gpu/op/dynamic_quantize.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
// Copyright (C) 2024 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "openvino/op/op.hpp" | ||
#include "ov_ops/dynamic_quantize.hpp" | ||
|
||
namespace ov { | ||
namespace intel_gpu { | ||
namespace op { | ||
|
||
class DynamicQuantize : public ov::op::internal::DynamicQuantize { | ||
public: | ||
OPENVINO_OP("DynamicQuantize", "gpu_opset"); | ||
|
||
using QuantizationConfig = ov::op::internal::QuantizationConfig; | ||
|
||
DynamicQuantize() = default; | ||
/// \brief Constructs an DynamicQuantize operation. | ||
/// | ||
/// \param data Input tensor with data | ||
/// \param config Dynamic quantization configuration | ||
/// \param scales_zp_output_order Non default order of scales | ||
/// \param combine_scales_and_zp Save scales and zero points into single buffer by pairs (scale, zp) | ||
DynamicQuantize(const Output<Node>& data, | ||
const QuantizationConfig& config, | ||
const std::vector<uint64_t>& scales_zp_output_order = {}, | ||
const bool combine_scales_and_zp = false); | ||
|
||
void validate_and_infer_types() override; | ||
|
||
std::shared_ptr<Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override; | ||
|
||
const std::vector<uint64_t>& get_scales_zp_output_order() const { | ||
return m_scales_zp_output_order; | ||
}; | ||
|
||
bool get_combine_scales_and_zp() const { | ||
return m_combine_scales_and_zp; | ||
}; | ||
|
||
static std::vector<ov::PartialShape> shape_infer(const DynamicQuantize* op, | ||
const std::vector<ov::PartialShape>& input_shapes, | ||
const QuantizationConfig& config, | ||
const std::vector<uint64_t>& scales_zp_output_order, | ||
const bool combine_scales_and_zp = false); | ||
|
||
private: | ||
bool m_combine_scales_and_zp = false; | ||
std::vector<uint64_t> m_scales_zp_output_order; | ||
}; | ||
|
||
} // namespace internal | ||
} // namespace op | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
41 changes: 41 additions & 0 deletions
41
src/plugins/intel_gpu/include/intel_gpu/op/read_values.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
// Copyright (C) 2023 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "intel_gpu/op/read_value.hpp" | ||
|
||
namespace ov { | ||
namespace intel_gpu { | ||
namespace op { | ||
|
||
/// \brief This operation handles the OpenVINO GPU Plugin's custom variable representation (which can store multiple states in a single variable) at the graph level. | ||
class ReadValues : public ReadValue { | ||
public: | ||
OPENVINO_OP("ReadValues", "gpu_opset"); | ||
|
||
ReadValues() = default; | ||
|
||
ReadValues(const std::shared_ptr<ov::op::util::Variable>& variable, | ||
const std::vector<ov::op::util::VariableInfo>& internal_states_infos); | ||
|
||
ReadValues(const OutputVector& variable_initializers, | ||
const std::shared_ptr<ov::op::util::Variable>& variable, | ||
const std::vector<ov::op::util::VariableInfo>& internal_states_infos); | ||
|
||
bool visit_attributes(ov::AttributeVisitor& visitor) override; | ||
|
||
void validate_and_infer_types() override; | ||
|
||
std::vector<ov::op::util::VariableInfo> get_all_internal_states_info() const; | ||
|
||
std::shared_ptr<Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override; | ||
|
||
private: | ||
std::vector<ov::op::util::VariableInfo> m_internal_states_infos; | ||
}; | ||
|
||
} // namespace op | ||
} // namespace intel_gpu | ||
} // namespace ov |
Oops, something went wrong.