Skip to content

Commit

Permalink
NPUW: Fix for dynamic dispatch for AVX2 code
Browse files Browse the repository at this point in the history
  • Loading branch information
eshiryae committed Oct 16, 2024
1 parent 6eca67c commit 7060669
Show file tree
Hide file tree
Showing 9 changed files with 1,646 additions and 1,614 deletions.
2 changes: 1 addition & 1 deletion src/plugins/intel_npu/src/plugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ cross_compiled_file(${TARGET_NAME}
ARCH AVX2 ANY
npuw/util_xarch.cpp
API npuw/util_xarch.hpp
NAME unpack unpack_scale unpack_scale_zp to_f16
NAME unpack_i4i8 unpack_u4i8 unpack_i4f16 unpack_i4f16_scale unpack_i4f16_z unpack_u4f16 unpack_u4f16_scale_zp unpack_u4f16_asymm_zp unpack_u4f16_z unpack_u4f32 unpack_i8f16 unpack_i8f16_scale unpack_u8f16 to_f16
NAMESPACE ov::npuw::util::XARCH
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#include "openvino/runtime/iasync_infer_request.hpp"
#include "plugin.hpp"
#include "util.hpp"
#include "util_xarch.hpp"
#include "weights_bank.hpp"

ov::npuw::MemAccessSim::MemAccessSim(const std::shared_ptr<ov::npuw::CompiledModel>& compiled_model) {
Expand Down Expand Up @@ -777,18 +776,18 @@ void ov::npuw::JustInferRequest::unpack_closure(std::size_t idx, RqPtr request)

if (!comp_model_desc.scales.empty() && comp_model_desc.scales[cidx] && comp_model_desc.zerops[cidx]) {
// Unpacking this weight requires scaling with zero points...
ov::npuw::util::XARCH::unpack_scale_zp(ov::get_tensor_impl(closure),
ov::get_tensor_impl(comp_model_desc.zerops[cidx]),
ov::get_tensor_impl(comp_model_desc.scales[cidx]),
clparam);
ov::npuw::util::unpack(ov::get_tensor_impl(closure),
ov::get_tensor_impl(comp_model_desc.zerops[cidx]),
ov::get_tensor_impl(comp_model_desc.scales[cidx]),
clparam);
} else if (!comp_model_desc.scales.empty() && comp_model_desc.scales[cidx]) {
// Unpacking this weight requires scaling
ov::npuw::util::XARCH::unpack_scale(ov::get_tensor_impl(closure),
ov::get_tensor_impl(comp_model_desc.scales[cidx]),
clparam);
ov::npuw::util::unpack(ov::get_tensor_impl(closure),
ov::get_tensor_impl(comp_model_desc.scales[cidx]),
clparam);
} else {
// Unpacking this weight doesn't require scaling
ov::npuw::util::XARCH::unpack(ov::get_tensor_impl(closure), clparam);
ov::npuw::util::unpack(ov::get_tensor_impl(closure), clparam);
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,9 +207,9 @@ ov::Tensor LazyTensorImpl::eval() const {
const auto& ts = cs.get_orig_tensor();
ov::Tensor dst(type, shape);
if (tw && tz && ts) {
ov::npuw::util::XARCH::unpack_scale_zp(gti(tw), gti(tz), gti(ts), gti(dst));
ov::npuw::util::unpack(gti(tw), gti(tz), gti(ts), gti(dst));
} else if (tw && ts) {
ov::npuw::util::XARCH::unpack_scale(gti(tw), gti(ts), gti(dst));
ov::npuw::util::unpack(gti(tw), gti(ts), gti(dst));
} else {
NPUW_ASSERT(false && "Unsupported combination");
}
Expand All @@ -224,7 +224,7 @@ ov::Tensor LazyTensorImpl::eval() const {
case TransformType::PERMUTE:
return ov::npuw::util::permute(m_parent->eval(), std::get<std::vector<std::size_t>>(m_transform.second));
case TransformType::CONVERT:
return ov::npuw::util::XARCH::to_f16(m_parent->eval());
return ov::npuw::util::to_f16(m_parent->eval());
default:
NPUW_ASSERT(false);
}
Expand Down
1 change: 0 additions & 1 deletion src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#include "openvino/runtime/make_tensor.hpp"
#include "openvino/runtime/tensor.hpp"
#include "util.hpp"
#include "util_xarch.hpp"

namespace ov {
namespace npuw {
Expand Down
Loading

0 comments on commit 7060669

Please sign in to comment.