forked from openvinotoolkit/openvino
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
70 changed files
with
3,242 additions
and
319 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
57 changes: 57 additions & 0 deletions
57
src/plugins/intel_gpu/include/intel_gpu/op/dynamic_quantize.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
// Copyright (C) 2024 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "openvino/op/op.hpp" | ||
#include "ov_ops/dynamic_quantize.hpp" | ||
|
||
namespace ov { | ||
namespace intel_gpu { | ||
namespace op { | ||
|
||
class DynamicQuantize : public ov::op::internal::DynamicQuantize { | ||
public: | ||
OPENVINO_OP("DynamicQuantize", "gpu_opset"); | ||
|
||
using QuantizationConfig = ov::op::internal::QuantizationConfig; | ||
|
||
DynamicQuantize() = default; | ||
/// \brief Constructs an DynamicQuantize operation. | ||
/// | ||
/// \param data Input tensor with data | ||
/// \param config Dynamic quantization configuration | ||
/// \param scales_zp_output_order Specifies on default order of scales and zero points | ||
/// \param combine_scales_and_zp If true, combines scales and zero points into a single buffer, pairing each scale with its corresponding zero point | ||
DynamicQuantize(const Output<Node>& data, | ||
const QuantizationConfig& config, | ||
const std::vector<uint64_t>& scales_zp_output_order = {}, | ||
const bool combine_scales_and_zp = false); | ||
|
||
void validate_and_infer_types() override; | ||
|
||
std::shared_ptr<Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override; | ||
|
||
const std::vector<uint64_t>& get_scales_zp_output_order() const { | ||
return m_scales_zp_output_order; | ||
} | ||
|
||
bool get_combine_scales_and_zp() const { | ||
return m_combine_scales_and_zp; | ||
} | ||
|
||
static std::vector<ov::PartialShape> shape_infer(const DynamicQuantize* op, | ||
const std::vector<ov::PartialShape>& input_shapes, | ||
const QuantizationConfig& config, | ||
const std::vector<uint64_t>& scales_zp_output_order, | ||
const bool combine_scales_and_zp = false); | ||
|
||
private: | ||
bool m_combine_scales_and_zp = false; | ||
std::vector<uint64_t> m_scales_zp_output_order; | ||
}; | ||
|
||
} // namespace op | ||
} // namespace intel_gpu | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
42 changes: 42 additions & 0 deletions
42
src/plugins/intel_gpu/include/intel_gpu/op/read_values.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
// Copyright (C) 2023 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "intel_gpu/op/read_value.hpp" | ||
|
||
namespace ov { | ||
namespace intel_gpu { | ||
namespace op { | ||
|
||
/// \brief This operation handles the OpenVINO GPU Plugin's custom variable | ||
// representation (which can store multiple states in a single variable) at the graph level. | ||
class ReadValues : public ReadValue { | ||
public: | ||
OPENVINO_OP("ReadValues", "gpu_opset"); | ||
|
||
ReadValues() = default; | ||
|
||
ReadValues(const std::shared_ptr<ov::op::util::Variable>& variable, | ||
const std::vector<ov::op::util::VariableInfo>& internal_states_infos); | ||
|
||
ReadValues(const OutputVector& variable_initializers, | ||
const std::shared_ptr<ov::op::util::Variable>& variable, | ||
const std::vector<ov::op::util::VariableInfo>& internal_states_infos); | ||
|
||
bool visit_attributes(ov::AttributeVisitor& visitor) override; | ||
|
||
void validate_and_infer_types() override; | ||
|
||
std::vector<ov::op::util::VariableInfo> get_all_internal_states_info() const; | ||
|
||
std::shared_ptr<Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override; | ||
|
||
private: | ||
std::vector<ov::op::util::VariableInfo> m_internal_states_infos; | ||
}; | ||
|
||
} // namespace op | ||
} // namespace intel_gpu | ||
} // namespace ov |
Oops, something went wrong.