Skip to content

Commit

Permalink
fix code style and warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
alvoron committed Dec 12, 2024
1 parent aeca18e commit 44e04cf
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 236 deletions.
2 changes: 1 addition & 1 deletion src/plugins/intel_cpu/src/dnnl_postops_composer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@

#include "cpu_types.h"
#include "memory_desc/dnnl_blocked_memory_desc.h"
#include "nodes/executors/memory_arguments.hpp"
#include "nodes/executors/common/common_utils.hpp"
#include "nodes/executors/memory_arguments.hpp"
#include "openvino/core/type/element_type.hpp"
#include "utils/cpu_utils.hpp"
#include "utils/debug_capabilities.h"
Expand Down
233 changes: 0 additions & 233 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,239 +24,6 @@
namespace ov {
namespace intel_cpu {

static VectorDims makeDummyInputDims(const Shape& inShape, const Shape& wShape) {
const auto& weightDims = wShape.getStaticDims();

auto inMinDims = inShape.getMinDims();
auto inMaxDims = inShape.getMaxDims();
inMinDims.back() = weightDims.back();
inMaxDims.back() = weightDims.back();

return MemoryDescUtils::makeDummyShape(Shape(inMinDims, inMaxDims)).getStaticDims();
}

static VectorDims makeDummyOutputDims(const VectorDims& inShape, const VectorDims& wShape, const size_t out_rank) {
size_t activationRank = inShape.size();
size_t channelRank = wShape.size() - 1;
// activation weight output_shape
// NCHW CoCHW NCo
// TNC CoC TNCo
// NC CoC NCo
VectorDims outputShape(out_rank, 1);
// set Co
outputShape.back() = wShape[0];
// set batch dims
size_t batchRank = activationRank - channelRank;
size_t startIdx = out_rank - batchRank - 1;
for (size_t i = 0; i < batchRank; i++) {
outputShape[i + startIdx] = inShape[i];
}

return outputShape;
}

static DnnlMemoryDescPtr makeTransposedWeightDescriptor(const DnnlMemoryDescPtr srcDesc,
const DnnlMemoryDescPtr dstDesc) {
const auto& weiDesc = srcDesc->getDnnlDesc();
const auto reorderedWeiDesc = dnnl::memory::desc{weiDesc.get_dims(), weiDesc.get_data_type(), dnnl::memory::format_tag::ba};
const auto transposedWeiDesc = reorderedWeiDesc.reshape(dstDesc->getDnnlDesc().get_dims());

return DnnlExtensionUtils::makeDescriptor(transposedWeiDesc);
}

static ov::optional<MemoryPtr> convertWeightPrecision(MemoryPtr input, MemoryPtr output, ov::element::Type weightPrecision) {
MemoryArgs memoryArgs;
memoryArgs[ARG_SRC] = input;
memoryArgs[ARG_DST] = output;

auto aclWeightsConverter = std::make_shared<acl_fc_executor::ACLWeightsConverter>();
if (aclWeightsConverter->update(memoryArgs)) {
aclWeightsConverter->execute(memoryArgs);
return ov::optional<MemoryPtr>(memoryArgs.at(ARG_DST));
}

if (!node::Convert::isSupportedDesc(input->getDesc()) ||
!node::Convert::isSupportedDesc(output->getDesc())) {
return {};
}

auto data = static_cast<const uint8_t *>(input->getData());
std::vector<uint8_t> tmpBuff;
tmpBuff.resize(output->getSize());
cpu_convert(data, tmpBuff.data(), DnnlExtensionUtils::DataTypeToElementType(input->getDataType()),
weightPrecision, input->getSize() / input->getDesc().getPrecision().size());

return ov::optional<MemoryPtr>(std::make_shared<Memory>(output->getPrimitive().get_engine(),
output->getDesc().cloneWithNewPrecision(weightPrecision),
tmpBuff.data()));
}

static ov::optional<MemoryPtr> reorderDataFallback(MemoryPtr input, MemoryPtr output, ExecutorContext::CPtr context) {
if (output->getDataType() == input->getDataType()) {
return {};
}
const auto inPrc = DnnlExtensionUtils::DataTypeToElementType(input->getDataType());
auto convertedDstMemoryDesc = output->getDesc().cloneWithNewPrecision(inPrc);
dnnl::reorder reorderWithoutConvert = getReorderPrim(context->getRuntimeCache(),
output->getPrimitive().get_engine(),
input->getPrimitive().get_desc(),
MemoryDescUtils::convertToDnnlMemoryDesc(convertedDstMemoryDesc)->getDnnlDesc());

if (reorderWithoutConvert && parse_impl_name(reorderWithoutConvert.get_primitive_desc()->impl()->name()) != ref_any) {
auto convertOutput = convertWeightPrecision(input, output, inPrc);
if (!convertOutput) {
return {};
}
input = *convertOutput;

if (reorderWithoutConvert) {
dnnl::stream loc_stream(output->getPrimitive().get_engine(), dnnl::stream::flags::in_order);
reorderWithoutConvert.execute(loc_stream, {{DNNL_ARG_FROM, input->getPrimitive()}, {DNNL_ARG_TO, output->getPrimitive()}});
return ov::optional<MemoryPtr>(output);
}
}
return {};
}

static MemoryPtr reorderData(DnnlMemoryDescPtr srcWeightDesc,
DnnlMemoryDescPtr dstWeightDesc,
MemoryCPtr weightsMem,
ExecutorContext::CPtr context) {
MemoryPtr input = std::make_shared<Memory>(context->getEngine(), srcWeightDesc, weightsMem->getData());
MemoryPtr output = std::make_shared<Memory>(context->getEngine(), dstWeightDesc);
if (!input->getDesc().isDefined() || !output->getDesc().isDefined())
OPENVINO_THROW("Can't reorder data with dynamic shapes");

if (input->getShape().hasZeroDims() || output->getShape().hasZeroDims()) {
return output;
}

if (input->getDesc().isCompatible(output->getDesc())) {
auto srcPtr = static_cast<uint8_t*>(input->getData());
auto dstPtr = static_cast<uint8_t*>(output->getData());
auto copySize = output->getSize();
cpu_memcpy(dstPtr, srcPtr, copySize);
return output;
}

// try directly reorder
auto engine = output->getPrimitive().get_engine();
dnnl::reorder directReorder = getReorderPrim(context->getRuntimeCache(),
engine,
input->getPrimitive().get_desc(),
output->getPrimitive().get_desc());

if (!directReorder || parse_impl_name(directReorder.get_primitive_desc()->impl()->name()) == ref_any) {
// try precision conversion then do the reorder
auto fallbackOutput = reorderDataFallback(input, output, context);
if (fallbackOutput) {
return *fallbackOutput;
}
}
// if precision conversion does not work then do direct reference reorder
if (directReorder) {
dnnl::stream loc_stream(engine, dnnl::stream::flags::in_order);
directReorder.execute(loc_stream, {{DNNL_ARG_FROM, input->getPrimitive()}, {DNNL_ARG_TO, output->getPrimitive()}});
} else {
OPENVINO_THROW("Could not make onednn reorder.");
}
return output;
}

static MemoryPtr reorderWeights(const MemoryArgs &memory,
const ExecutorContext::CPtr context,
ACLFCAttrs& aclfcAttrs,
DnnlMemoryDescPtr dnnlSrcDesc,
DnnlMemoryDescPtr dnnlDstDesc) {
auto create = [&]() {
MemoryPtr weightsMemory = memory.at(ARG_WEI);
if (aclfcAttrs.isWeightsRepacked || aclfcAttrs.isConvertedWeights) {
weightsMemory = reorderData(dnnlSrcDesc, dnnlDstDesc, memory.at(ARG_WEI), context);
DEBUG_LOG("ACLFullyConnectedExecutor: cache miss, perform packing");
}
return weightsMemory;
};

auto weightCache = context->getWeightsCache();
if (weightCache != nullptr) {
const auto& wgtDims = memory.at(ARG_WEI)->getStaticDims();
const auto N = wgtDims[0];
const auto K = wgtDims[1];
std::string format = "fc_acl_" + std::to_string(N) + "_" + std::to_string(K);
const std::string string_hash = format + "_" + std::to_string(memory.at(ARG_WEI)->getSize()) + "_" +
std::to_string(reinterpret_cast<uint64_t>(memory.at(ARG_WEI)->getData()));
DEBUG_LOG("ACLFullyConnectedExecutor: findOrCreate, string_hash: ", string_hash);
return *weightCache->findOrCreate(string_hash, create);
}

DEBUG_LOG("ACLFullyConnectedExecutor: Weights cache is not available");
return create();
}

static MemoryPtr prepareWeightMemory(const MemoryArgs &memory,
const ExecutorContext::CPtr context,
const FCAttrs &attrs,
ACLFCAttrs& aclfcAttrs,
const PostOps &postOps,
arm_compute::WeightFormat& expectedWeightFormat,
arm_compute::TensorInfo& weiTensorInfo) {
MemoryArgs memoryArgs;
memoryArgs[ARG_BIAS] = memory.at(ARG_BIAS);
memoryArgs[ARG_WEI] = memory.at(ARG_WEI);

auto originalWeightsDesc = memory.at(ARG_WEI)->getDescPtr();

// normalize weights to 2D
const auto& wgtDims = originalWeightsDesc->getShape().getStaticDims();
const VectorDims wgtDims2D = reshapeDownToRank<2>(wgtDims);

originalWeightsDesc = std::make_shared<CpuBlockedMemoryDesc>(originalWeightsDesc->getPrecision(), Shape{wgtDims2D});

auto dnnlSrcDesc = MemoryDescUtils::convertToDnnlMemoryDesc(originalWeightsDesc);
auto dstDesc = originalWeightsDesc->cloneWithNewPrecision(aclfcAttrs.inputPrecision);
auto dnnlDstDesc = MemoryDescUtils::convertToDnnlMemoryDesc(dstDesc);

if (memory.at(ARG_SRC_0)->getShape().isDynamic()) {
const auto& inShape = memory.at(ARG_SRC_0)->getShape();
const auto& wShape = originalWeightsDesc->getShape();
const auto& inDymmyDims = makeDummyInputDims(inShape, wShape);
const auto& outDymmyDims = makeDummyOutputDims(inDymmyDims, wShape.getStaticDims(), memory.at(ARG_DST)->getShape().getRank());
memoryArgs[ARG_SRC_0] = std::make_shared<Memory>(context->getEngine(),
memory.at(ARG_SRC_0)->getDescPtr()->cloneWithNewDims(inDymmyDims));
memoryArgs[ARG_DST] = std::make_shared<Memory>(context->getEngine(),
memory.at(ARG_DST)->getDescPtr()->cloneWithNewDims(outDymmyDims));
} else {
memoryArgs[ARG_SRC_0] = memory.at(ARG_SRC_0);
memoryArgs[ARG_DST] = memory.at(ARG_DST);
}

// TODO: ACLWeightFormatGenerator should be replaced with Reorder executor
// that calls ACL NEReorder + NETranspose or dnnl::reorder depending on backend availability
auto aclWeightsRepack = std::make_shared<acl_fc_executor::ACLWeightFormatGenerator>(attrs, postOps, memoryArgs);
bool isNeededReorder = aclWeightsRepack->update(memoryArgs);
expectedWeightFormat = isNeededReorder ? aclWeightsRepack->getOptImplWeightFormat() : arm_compute::WeightFormat::UNSPECIFIED;
weiTensorInfo = aclWeightsRepack->getTensorInfo(ACLArgs::ACL_WEI);

if (isNeededReorder) {
dnnl::impl::dim_t o_dim = 0;
dnnl::impl::dim_t inner_dim = 1;
std::vector<dnnl::impl::dim_t> remaining_dims = {};
auto weights_md_ = dnnlDstDesc->getDnnlDesc().get();
dnnl::impl::cpu::acl::acl_utils::reorder_to_weight_format(weiTensorInfo, *weights_md_, expectedWeightFormat,
inner_dim, o_dim, remaining_dims, {});
if (aclfcAttrs.weightsNonTransposed) {
dnnlSrcDesc = makeTransposedWeightDescriptor(dnnlSrcDesc, dnnlDstDesc);
}
aclfcAttrs.isWeightsRepacked = true;
return reorderWeights(memory, context, aclfcAttrs, dnnlSrcDesc, dnnlDstDesc);
}
if (!aclfcAttrs.weightsNonTransposed) {
dnnlDstDesc = makeTransposedWeightDescriptor(dnnlDstDesc, dnnlSrcDesc);
aclfcAttrs.isWeightsRepacked = true;
}
return reorderWeights(memory, context, aclfcAttrs, dnnlSrcDesc, dnnlDstDesc);
}

static bool checkPostOps(const PostOps &postOps) {
if (postOps.empty()) {
return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,5 +61,5 @@ static std::vector<float> getDeQuantizedScales(const MemoryArgs& memory) {
return DQScales;
}

} // namespace intel_cpu
} // namespace ov
} // namespace intel_cpu
} // namespace ov

0 comments on commit 44e04cf

Please sign in to comment.