diff --git a/paddle/fluid/operators/detection/box_coder_op_npu.cc b/paddle/fluid/operators/detection/box_coder_op_npu.cc deleted file mode 100644 index 4170088faff18..0000000000000 --- a/paddle/fluid/operators/detection/box_coder_op_npu.cc +++ /dev/null @@ -1,448 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/kernels/impl/box_coder.h" - -namespace paddle { -namespace operators { - -template -struct BoxCoderFunction { - public: - explicit BoxCoderFunction(const framework::ExecutionContext& ctx) : ctx(ctx) { - place = ctx.GetPlace(); - stream = ctx.template device_context() - .stream(); - } - phi::DenseTensor Adds(const phi::DenseTensor& x, float scalar) { - phi::DenseTensor y; - y.mutable_data(x.dims(), place); - const auto& runner = NpuOpRunner("Adds", {x}, {y}, {{"value", scalar}}); - runner.Run(stream); - return y; - } - phi::DenseTensor Muls(const phi::DenseTensor& x, float scalar) { - phi::DenseTensor y; - y.mutable_data(x.dims(), place); - const auto& runner = NpuOpRunner("Muls", {x}, {y}, {{"value", scalar}}); - runner.Run(stream); - return y; - } - phi::DenseTensor Mul(const phi::DenseTensor& x, const phi::DenseTensor& y) { - phi::DenseTensor z; - z.mutable_data(x.dims(), place); - const auto& runner = NpuOpRunner("Mul", {x, y}, {z}, {}); - runner.Run(stream); - return z; - } - phi::DenseTensor SubWithBroadCast(const phi::DenseTensor& x, - const phi::DenseTensor& y, - const framework::DDim& shape) { - phi::DenseTensor z; - z.mutable_data(shape, place); - const auto& runner = NpuOpRunner("Sub", {x, y}, {z}, {}); - runner.Run(stream); - return z; - } - void DivWithBroadCastVoid(const phi::DenseTensor& x, - const phi::DenseTensor& y, - const framework::DDim& shape, - phi::DenseTensor* z) { - z->mutable_data(shape, place); - const auto& runner = NpuOpRunner("Div", {x, y}, {*z}, {}); - runner.Run(stream); - } - phi::DenseTensor DivWithBroadCast(const phi::DenseTensor& x, - const phi::DenseTensor& y, - const framework::DDim& shape) { - phi::DenseTensor z; - DivWithBroadCastVoid(x, y, shape, &z); - return z; - } - void MulWithBroadCastVoid(const phi::DenseTensor& x, - const phi::DenseTensor& y, - const framework::DDim& shape, - phi::DenseTensor* z) { - z->mutable_data(shape, place); - const auto& runner = NpuOpRunner("Mul", {x, y}, {*z}, {}); - runner.Run(stream); - } - phi::DenseTensor MulWithBroadCast(const phi::DenseTensor& x, - const phi::DenseTensor& y, - const framework::DDim& shape) { - phi::DenseTensor z; - MulWithBroadCastVoid(x, y, shape, &z); - return z; - } - void AddWithBroadCastVoid(const phi::DenseTensor& x, - const phi::DenseTensor& y, - const framework::DDim& shape, - phi::DenseTensor* z) { - z->mutable_data(shape, place); - const auto& runner = NpuOpRunner("AddV2", {x, y}, {*z}, {}); - runner.Run(stream); - } - phi::DenseTensor AddWithBroadCast(const phi::DenseTensor& x, - const phi::DenseTensor& y, - const framework::DDim& shape) { - phi::DenseTensor z; - AddWithBroadCastVoid(x, y, shape, &z); - return z; - } - phi::DenseTensor Abs(const phi::DenseTensor& x) { - phi::DenseTensor y; - y.mutable_data(x.dims(), place); - const auto& runner = NpuOpRunner("Abs", {x}, {y}, {}); - runner.Run(stream); - return y; - } - phi::DenseTensor Log(const phi::DenseTensor& x) { - phi::DenseTensor t_x_m1 = Adds(x, -1); - phi::DenseTensor y; - y.mutable_data(x.dims(), place); - const auto& runner = NpuOpRunner("Log1p", {t_x_m1}, {y}, {}); - runner.Run(stream); - return y; - } - phi::DenseTensor Exp(const phi::DenseTensor& x) { - phi::DenseTensor y; - y.mutable_data(x.dims(), place); - const auto& runner = NpuOpRunner("Exp", {x}, {y}, {}); - runner.Run(stream); - return y; - } - phi::DenseTensor Dot(const phi::DenseTensor& x, const phi::DenseTensor& y) { - auto dim_x = x.dims(); - auto dim_y = y.dims(); - PADDLE_ENFORCE_EQ( - dim_x.size(), - 2, - platform::errors::InvalidArgument( - "x should be a 2-dim tensor, but got %d-dim.", dim_x.size())); - PADDLE_ENFORCE_EQ( - dim_y.size(), - 2, - platform::errors::InvalidArgument( - "y should be a 2-dim tensor, but got %d-dim.", dim_y.size())); - PADDLE_ENFORCE_EQ( - dim_x[1], - dim_y[0], - platform::errors::InvalidArgument("Expect dim_x[1] == dim_y[0], but " - "got dim_x[1] = %d, dim_y[0] = %d.", - dim_x[1], - dim_y[0])); - phi::DenseTensor z; - z.mutable_data({dim_x[0], dim_y[1]}, place); - const auto& runner = - NpuOpRunner("MatMul", - {x, y}, - {z}, - {{"transpose_x1", false}, {"transpose_x2", false}}); - runner.Run(stream); - return z; - } - void ConcatVoid(const std::vector& inputs, - const framework::DDim& shape_out, - int axis, - phi::DenseTensor* output) { - output->mutable_data(shape_out, place); - std::vector names; - for (size_t i = 0; i < inputs.size(); i++) { - names.push_back("x" + std::to_string(i)); - } - NpuOpRunner runner{ - "ConcatD", - {inputs}, - {*output}, - {{"concat_dim", axis}, {"N", static_cast(inputs.size())}}}; - runner.AddInputNames(names); - runner.Run(stream); - } - phi::DenseTensor Concat(const std::vector& inputs, - const framework::DDim& shape_out, - int axis) { - phi::DenseTensor output; - ConcatVoid(inputs, shape_out, axis, &output); - return output; - } - phi::DenseTensor Slice(const phi::DenseTensor& x, - const std::vector& offsets, - const std::vector& size, - const framework::DDim& shape) { - phi::DenseTensor y; - y.mutable_data(shape, place); - const auto& runner = - NpuOpRunner("SliceD", {x}, {y}, {{"offsets", offsets}, {"size", size}}); - runner.Run(stream); - return y; - } - - private: - platform::Place place; - aclrtStream stream; - const framework::ExecutionContext& ctx; -}; - -template -void Vector2Tensor(const framework::ExecutionContext& ctx, - const std::vector& vec, - const framework::DDim& ddim, - phi::DenseTensor* tsr) { - framework::TensorFromVector(vec, ctx.device_context(), tsr); - ctx.template device_context().Wait(); - tsr->Resize(ddim); -} - -template -void BoxCoderEnc(const framework::ExecutionContext& ctx, - const phi::DenseTensor* tb, - const phi::DenseTensor* pb, - const phi::DenseTensor* pbv, - const bool norm, - const std::vector& variance, - phi::DenseTensor* out) { - auto M = pb->dims()[0]; - auto N = tb->dims()[0]; - auto shape_0 = phi::make_ddim({4, 2}); - phi::DenseTensor m_diff; - phi::DenseTensor m_aver; - std::vector vec_diff = {static_cast(-1), - static_cast(0), - static_cast(0), - static_cast(-1), - static_cast(1), - static_cast(0), - static_cast(0), - static_cast(1)}; - std::vector vec_aver = {static_cast(0.5), - static_cast(0), - static_cast(0), - static_cast(0.5), - static_cast(0.5), - static_cast(0), - static_cast(0), - static_cast(0.5)}; - Vector2Tensor(ctx, vec_diff, shape_0, &m_diff); - Vector2Tensor(ctx, vec_aver, shape_0, &m_aver); - - BoxCoderFunction F(ctx); - phi::DenseTensor pb_xy = F.Adds(F.Dot(*pb, m_aver), (norm ? 0 : 0.5)); - phi::DenseTensor pb_wh = F.Adds(F.Dot(*pb, m_diff), (norm ? 0 : 1)); - phi::DenseTensor tb_xy = F.Dot(*tb, m_aver); - phi::DenseTensor tb_wh = F.Adds(F.Dot(*tb, m_diff), (norm ? 0 : 1)); - - pb_xy.Resize({1, M, 2}); - pb_wh.Resize({1, M, 2}); - tb_xy.Resize({N, 1, 2}); - tb_wh.Resize({N, 1, 2}); - - auto shape_half = phi::make_ddim({N, M, 2}); - auto shape_full = phi::make_ddim({N, M, 4}); - - phi::DenseTensor out_xy_0 = F.DivWithBroadCast( - F.SubWithBroadCast(tb_xy, pb_xy, shape_half), pb_wh, shape_half); - phi::DenseTensor out_wh_0 = - F.Log(F.Abs(F.DivWithBroadCast(tb_wh, pb_wh, shape_half))); - phi::DenseTensor out_0 = F.Concat({out_xy_0, out_wh_0}, shape_full, 2); - - if (pbv) { - F.DivWithBroadCastVoid(out_0, *pbv, shape_full, out); - } else { - phi::DenseTensor t_var; - std::vector vec_var(4); - for (auto i = 0; i < 4; i++) { - vec_var[i] = static_cast(variance[i]); - } - Vector2Tensor(ctx, vec_var, phi::make_ddim({1, 1, 4}), &t_var); - F.DivWithBroadCastVoid(out_0, t_var, shape_full, out); - } -} - -template -void BoxCoderDec(const framework::ExecutionContext& ctx, - const phi::DenseTensor* tb, - const phi::DenseTensor* pb, - const phi::DenseTensor* pbv, - const bool norm, - const std::vector& variance, - int axis, - phi::DenseTensor* out) { - auto shape_0 = phi::make_ddim({4, 2}); - phi::DenseTensor m_diff; - phi::DenseTensor m_aver; - std::vector vec_diff = {static_cast(-1), - static_cast(0), - static_cast(0), - static_cast(-1), - static_cast(1), - static_cast(0), - static_cast(0), - static_cast(1)}; - std::vector vec_aver = {static_cast(0.5), - static_cast(0), - static_cast(0), - static_cast(0.5), - static_cast(0.5), - static_cast(0), - static_cast(0), - static_cast(0.5)}; - Vector2Tensor(ctx, vec_diff, shape_0, &m_diff); - Vector2Tensor(ctx, vec_aver, shape_0, &m_aver); - - BoxCoderFunction F(ctx); - phi::DenseTensor pb_xy = F.Adds(F.Dot(*pb, m_aver), (norm ? 0 : 0.5)); - phi::DenseTensor pb_wh = F.Adds(F.Dot(*pb, m_diff), (norm ? 0 : 1)); - auto pb_resize_shape = axis == 0 ? phi::make_ddim({1, pb->dims()[0], 2}) - : phi::make_ddim({pb->dims()[0], 1, 2}); - pb_xy.Resize(pb_resize_shape); - pb_wh.Resize(pb_resize_shape); - - auto tbox_slice_shape = phi::make_ddim({tb->dims()[0], tb->dims()[1], 2}); - std::vector tbox_slice_size = { - static_cast(tb->dims()[0]), static_cast(tb->dims()[1]), 2}; - phi::DenseTensor tbox01 = - F.Slice(*tb, {0, 0, 0}, tbox_slice_size, tbox_slice_shape); - phi::DenseTensor tbox23 = - F.Slice(*tb, {0, 0, 2}, tbox_slice_size, tbox_slice_shape); - - phi::DenseTensor tb_xy; - phi::DenseTensor tb_wh; - if (pbv) { - auto pbvt_slice_shape = phi::make_ddim({pbv->dims()[0], 2}); - auto pbvt_resize_shape = axis == 0 ? phi::make_ddim({1, pbv->dims()[0], 2}) - : phi::make_ddim({pbv->dims()[0], 1, 2}); - std::vector pbvt_slice_size = {static_cast(pbv->dims()[0]), 2}; - phi::DenseTensor pbv_t01 = - F.Slice(*pbv, {0, 0}, pbvt_slice_size, pbvt_slice_shape); - phi::DenseTensor pbv_t23 = - F.Slice(*pbv, {0, 2}, pbvt_slice_size, pbvt_slice_shape); - pbv_t01.Resize(pbvt_resize_shape); - pbv_t23.Resize(pbvt_resize_shape); - - F.AddWithBroadCastVoid( - F.MulWithBroadCast(tbox01, F.Mul(pb_wh, pbv_t01), tbox_slice_shape), - pb_xy, - tbox_slice_shape, - &tb_xy); - F.MulWithBroadCastVoid( - F.Exp(F.MulWithBroadCast(pbv_t23, tbox23, tbox_slice_shape)), - pb_wh, - tbox_slice_shape, - &tb_wh); - } else if (variance.empty()) { - F.AddWithBroadCastVoid(F.MulWithBroadCast(tbox01, pb_wh, tbox_slice_shape), - pb_xy, - tbox_slice_shape, - &tb_xy); - F.MulWithBroadCastVoid(F.Exp(tbox23), pb_wh, tbox_slice_shape, &tb_wh); - } else { - phi::DenseTensor t_var01, t_var23; - auto t_var_shape = phi::make_ddim({1, 1, 2}); - std::vector vec_var01 = {static_cast(variance[0]), - static_cast(variance[1])}; - std::vector vec_var23 = {static_cast(variance[2]), - static_cast(variance[3])}; - Vector2Tensor(ctx, vec_var01, t_var_shape, &t_var01); - Vector2Tensor(ctx, vec_var23, t_var_shape, &t_var23); - F.AddWithBroadCastVoid( - F.MulWithBroadCast(tbox01, - F.MulWithBroadCast(pb_wh, t_var01, pb_resize_shape), - tbox_slice_shape), - pb_xy, - tbox_slice_shape, - &tb_xy); - F.MulWithBroadCastVoid( - F.Exp(F.MulWithBroadCast(t_var23, tbox23, tbox_slice_shape)), - pb_wh, - tbox_slice_shape, - &tb_wh); - } - phi::DenseTensor obox01 = - F.AddWithBroadCast(tb_xy, F.Muls(tb_wh, -0.5), tbox_slice_shape); - phi::DenseTensor obox23 = - F.Adds(F.AddWithBroadCast(tb_xy, F.Muls(tb_wh, 0.5), tbox_slice_shape), - (norm ? 0 : -1)); - F.ConcatVoid({obox01, obox23}, out->dims(), 2, out); -} - -template -class BoxCoderNPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* prior_box = ctx.Input("PriorBox"); - auto* prior_box_var = ctx.Input("PriorBoxVar"); - auto* target_box = ctx.Input("TargetBox"); - auto* output_box = ctx.Output("OutputBox"); - std::vector variance = ctx.Attr>("variance"); - const int axis = ctx.Attr("axis"); - - if (prior_box_var) { - PADDLE_ENFORCE_EQ(variance.empty(), - true, - platform::errors::InvalidArgument( - "Input 'PriorBoxVar' and attribute 'variance'" - " of BoxCoder operator should not be used at the " - "same time.")); - } - if (!(variance.empty())) { - PADDLE_ENFORCE_EQ(static_cast(variance.size()), - 4, - platform::errors::InvalidArgument( - "Size of attribute 'variance' in BoxCoder operator" - " should be 4. But received size is %d", - variance.size())); - } - - if (target_box->lod().size()) { - PADDLE_ENFORCE_EQ(target_box->lod().size(), - 1, - platform::errors::InvalidArgument( - "Input 'TargetBox' of BoxCoder operator only" - " supports LoD with one level.")); - } - - auto code_type = - phi::funcs::GetBoxCodeType(ctx.Attr("code_type")); - bool normalized = ctx.Attr("box_normalized"); - - if (code_type == phi::funcs::BoxCodeType::kEncodeCenterSize) { - BoxCoderEnc(ctx, - target_box, - prior_box, - prior_box_var, - normalized, - variance, - output_box); - } else { - BoxCoderDec(ctx, - target_box, - prior_box, - prior_box_var, - normalized, - variance, - axis, - output_box); - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_NPU_KERNEL(box_coder, - ops::BoxCoderNPUKernel, - ops::BoxCoderNPUKernel); diff --git a/paddle/fluid/operators/detection/density_prior_box_op_npu.cc b/paddle/fluid/operators/detection/density_prior_box_op_npu.cc deleted file mode 100644 index c9935e54d82ef..0000000000000 --- a/paddle/fluid/operators/detection/density_prior_box_op_npu.cc +++ /dev/null @@ -1,396 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/detection/density_prior_box_op.h" - -namespace paddle { -namespace operators { - -using fp16 = paddle::platform::float16; - -template -struct DensityPriorBoxFunction { - public: - explicit DensityPriorBoxFunction(const framework::ExecutionContext& ctx) - : ctx(ctx) { - place = ctx.GetPlace(); - stream = ctx.template device_context().stream(); - t0.mutable_data({1}, place); - t1.mutable_data({1}, place); - tn.mutable_data({1}, place); - FillNpuTensorWithConstant(&t0, static_cast(0)); - FillNpuTensorWithConstant(&t1, static_cast(1)); - } - void Arange(int n, phi::DenseTensor* x) { - // x should be init first - FillNpuTensorWithConstant(&tn, static_cast(n)); - const auto& runner = NpuOpRunner("Range", {t0, tn, t1}, {*x}, {}); - runner.Run(stream); - } - void Add(const phi::DenseTensor* x, - const phi::DenseTensor* y, - phi::DenseTensor* z) { - // z should be init first - const auto& runner = NpuOpRunner("AddV2", {*x, *y}, {*z}, {}); - runner.Run(stream); - } - void Cast(const phi::DenseTensor* x, phi::DenseTensor* y) { - auto dst_dtype = - ConvertToNpuDtype(framework::TransToProtoVarType(y->type())); - const auto& runner = NpuOpRunner( - "Cast", {*x}, {*y}, {{"dst_type", static_cast(dst_dtype)}}); - runner.Run(stream); - } - void Sub(const phi::DenseTensor* x, - const phi::DenseTensor* y, - phi::DenseTensor* z) { - // z should be init first - const auto& runner = NpuOpRunner("Sub", {*x, *y}, {*z}, {}); - runner.Run(stream); - } - void Mul(const phi::DenseTensor* x, - const phi::DenseTensor* y, - phi::DenseTensor* z) { - // y should be init first - const auto& runner = NpuOpRunner("Mul", {*x, *y}, {*z}, {}); - runner.Run(stream); - } - void Adds(const phi::DenseTensor* x, float scalar, phi::DenseTensor* y) { - // y should be init first - const auto& runner = NpuOpRunner("Adds", {*x}, {*y}, {{"value", scalar}}); - runner.Run(stream); - } - void Muls(const phi::DenseTensor* x, float scalar, phi::DenseTensor* y) { - // y should be init first - const auto& runner = NpuOpRunner("Muls", {*x}, {*y}, {{"value", scalar}}); - runner.Run(stream); - } - void Maximum(const phi::DenseTensor* x, - const phi::DenseTensor* y, - phi::DenseTensor* z) { - // y should be init first - const auto& runner = NpuOpRunner("Maximum", {*x, *y}, {*z}, {}); - runner.Run(stream); - } - void Minimum(const phi::DenseTensor* x, - const phi::DenseTensor* y, - phi::DenseTensor* z) { - // y should be init first - const auto& runner = NpuOpRunner("Minimum", {*x, *y}, {*z}, {}); - runner.Run(stream); - } - void Concat(const std::vector& inputs, - int axis, - phi::DenseTensor* output) { - // output should be init first - std::vector names; - for (size_t i = 0; i < inputs.size(); i++) { - names.push_back("x" + std::to_string(i)); - } - NpuOpRunner runner{ - "ConcatD", - {inputs}, - {*output}, - {{"concat_dim", axis}, {"N", static_cast(inputs.size())}}}; - runner.AddInputNames(names); - runner.Run(stream); - } - void Tile(const phi::DenseTensor* x, - phi::DenseTensor* y, - const std::vector& multiples) { - // y should be init first - if (x->dims() == y->dims()) { - framework::TensorCopy( - *x, - place, - ctx.template device_context(), - y); - return; - } - const auto& runner = - NpuOpRunner("TileD", {*x}, {*y}, {{"multiples", multiples}}); - runner.Run(stream); - } - void FloatVec2Tsr(const std::vector& vec, phi::DenseTensor* tsr_dst) { - // - framework::TensorFromVector(vec, ctx.device_context(), tsr_dst); - ctx.template device_context().Wait(); - } - - private: - platform::Place place; - aclrtStream stream; - const framework::ExecutionContext& ctx; - phi::DenseTensor t0; - phi::DenseTensor t1; - phi::DenseTensor tn; -}; - -template <> -void DensityPriorBoxFunction::Arange(int n, phi::DenseTensor* x) { - phi::DenseTensor x_fp32(phi::DataType::FLOAT32); - x_fp32.mutable_data(x->dims(), place); - FillNpuTensorWithConstant(&tn, static_cast(n)); - const auto& runner = NpuOpRunner("Range", {t0, tn, t1}, {x_fp32}, {}); - runner.Run(stream); - Cast(&x_fp32, x); -} - -template <> -void DensityPriorBoxFunction::FloatVec2Tsr(const std::vector& vec, - phi::DenseTensor* tsr_dst) { - phi::DenseTensor tsr_fp32(phi::DataType::FLOAT32); - tsr_fp32.mutable_data(tsr_dst->dims(), place); - framework::TensorFromVector(vec, ctx.device_context(), &tsr_fp32); - ctx.template device_context().Wait(); - Cast(&tsr_fp32, tsr_dst); -} - -template -class DensityPriorBoxOpNPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* image = ctx.Input("Image"); - auto* boxes = ctx.Output("Boxes"); - auto* vars = ctx.Output("Variances"); - - auto variances = ctx.Attr>("variances"); - auto clip = ctx.Attr("clip"); - - auto fixed_sizes = ctx.Attr>("fixed_sizes"); - auto fixed_ratios = ctx.Attr>("fixed_ratios"); - auto densities = ctx.Attr>("densities"); - - float step_w = ctx.Attr("step_w"); - float step_h = ctx.Attr("step_h"); - float offset = ctx.Attr("offset"); - - int image_w = image->dims()[3]; - int image_h = image->dims()[2]; - int layer_w = input->dims()[3]; - int layer_h = input->dims()[2]; - - auto _type = input->dtype(); - auto place = ctx.GetPlace(); - DensityPriorBoxFunction F(ctx); - - phi::DenseTensor h(_type); - h.mutable_data({layer_h}, place); - phi::DenseTensor w(_type); - w.mutable_data({layer_w}, place); - F.Arange(layer_h, &h); - F.Arange(layer_w, &w); - h.Resize({layer_h, 1, 1, 1}); - w.Resize({1, layer_w, 1, 1}); - - step_w = step_w > 0 ? step_w : static_cast(image_w) / layer_w; - step_h = step_h > 0 ? step_h : static_cast(image_h) / layer_h; - int step_average = static_cast((step_w + step_h) * 0.5); - - int ratios_size = fixed_ratios.size(); - int num_priors_per_ratio = 0; - for (size_t i = 0; i < densities.size(); ++i) { - num_priors_per_ratio += densities[i] * densities[i]; - } - phi::DenseTensor di(_type); - phi::DenseTensor dj(_type); - phi::DenseTensor shifts(_type); - phi::DenseTensor box_w_ratio(_type); - phi::DenseTensor box_h_ratio(_type); - di.mutable_data({ratios_size * num_priors_per_ratio}, place); - dj.mutable_data({ratios_size * num_priors_per_ratio}, place); - shifts.mutable_data({ratios_size * num_priors_per_ratio}, place); - box_w_ratio.mutable_data({ratios_size * num_priors_per_ratio}, place); - box_h_ratio.mutable_data({ratios_size * num_priors_per_ratio}, place); - - int64_t start = 0; - std::vector vec_tile = {0, 0, 0}; - for (size_t i = 0; i < densities.size(); ++i) { - // Range = start:start+ratios_size*density_sqr, density = densities[i] - int density_sqr = densities[i] * densities[i]; - // shifts[Range] = [step_average/density]*ratios_size*density_sqr - phi::DenseTensor shifts_part = - shifts.Slice(start, start + ratios_size * density_sqr); - FillNpuTensorWithConstant(&shifts_part, - static_cast(step_average / densities[i])); - - // di[Range] = [ i // density for i in range(density_sqr) ] * ratios_size - // dj[Range] = [ i % density for i in range(density_sqr) ] * ratios_size - phi::DenseTensor di_part = - di.Slice(start, start + ratios_size * density_sqr); - phi::DenseTensor dj_part = - dj.Slice(start, start + ratios_size * density_sqr); - if (densities[i] > 1) { - di_part.Resize({ratios_size, densities[i], densities[i]}); - dj_part.Resize({ratios_size, densities[i], densities[i]}); - phi::DenseTensor range_n(_type); - range_n.mutable_data({densities[i]}, place); - F.Arange(densities[i], &range_n); - range_n.Resize({1, densities[i], 1}); - vec_tile[0] = ratios_size; - vec_tile[1] = 1; - vec_tile[2] = densities[i]; - F.Tile(&range_n, &di_part, vec_tile); - range_n.Resize({1, 1, densities[i]}); - vec_tile[1] = densities[i]; - vec_tile[2] = 1; - F.Tile(&range_n, &dj_part, vec_tile); - } else { - FillNpuTensorWithConstant(&di_part, static_cast(0)); - FillNpuTensorWithConstant(&dj_part, static_cast(0)); - } - - int start_box_ratio = start; - for (float ar : fixed_ratios) { - // Range_mini = start_box_ratio:start_box_ratio+density_sqr - // box_h_ratio[Range_mini] = [fixed_sizes[i] * sqrt(ar)] * density_sqr - // box_w_ratio[Range_mini] = [fixed_sizes[i] / sqrt(ar)] * density_sqr - phi::DenseTensor box_h_ratio_part = - box_h_ratio.Slice(start_box_ratio, start_box_ratio + density_sqr); - phi::DenseTensor box_w_ratio_part = - box_w_ratio.Slice(start_box_ratio, start_box_ratio + density_sqr); - FillNpuTensorWithConstant(&box_w_ratio_part, - static_cast(fixed_sizes[i] * sqrt(ar))); - FillNpuTensorWithConstant(&box_h_ratio_part, - static_cast(fixed_sizes[i] / sqrt(ar))); - start_box_ratio += density_sqr; - } - start = start_box_ratio; - } - di.Resize({1, 1, ratios_size * num_priors_per_ratio, 1}); - dj.Resize({1, 1, ratios_size * num_priors_per_ratio, 1}); - shifts.Resize({1, 1, ratios_size * num_priors_per_ratio, 1}); - box_w_ratio.Resize({1, 1, ratios_size * num_priors_per_ratio, 1}); - box_h_ratio.Resize({1, 1, ratios_size * num_priors_per_ratio, 1}); - - // c_x = (w+offset)*step_w - 0.5*step_average + 0.5*shifts + dj*shifts - // c_y = (h+offset)*step_h - 0.5*step_average + 0.5*shifts + di*shifts - phi::DenseTensor c_x(_type); - phi::DenseTensor c_y(_type); - auto dim0 = - phi::make_ddim({1, layer_w, ratios_size * num_priors_per_ratio, 1}); - auto dim1 = - phi::make_ddim({layer_h, 1, ratios_size * num_priors_per_ratio, 1}); - c_x.mutable_data(dim0, place); - c_y.mutable_data(dim1, place); - F.Adds(&w, offset, &w); - F.Muls(&w, step_w, &w); - F.Adds(&w, static_cast(-step_average) * static_cast(0.5), &w); - F.Adds(&h, offset, &h); - F.Muls(&h, step_h, &h); - F.Adds(&h, static_cast(-step_average) * static_cast(0.5), &h); - F.Mul(&di, &shifts, &di); - F.Mul(&dj, &shifts, &dj); - F.Muls(&shifts, static_cast(0.5), &shifts); - F.Add(&di, &shifts, &di); - F.Add(&dj, &shifts, &dj); - F.Add(&dj, &w, &c_x); - F.Add(&di, &h, &c_y); - - // box_w_ratio = box_w_ratio / 2 - // box_h_ratio = box_h_ratio / 2 - F.Muls(&box_w_ratio, static_cast(0.5), &box_w_ratio); - F.Muls(&box_h_ratio, static_cast(0.5), &box_h_ratio); - - phi::DenseTensor zero_t(_type); - phi::DenseTensor one_t(_type); - zero_t.mutable_data({1}, place); - one_t.mutable_data({1}, place); - FillNpuTensorWithConstant(&zero_t, static_cast(0)); - FillNpuTensorWithConstant(&one_t, static_cast(1)); - - phi::DenseTensor outbox0(_type); - phi::DenseTensor outbox1(_type); - phi::DenseTensor outbox2(_type); - phi::DenseTensor outbox3(_type); - outbox0.mutable_data(dim0, place); - outbox1.mutable_data(dim1, place); - outbox2.mutable_data(dim0, place); - outbox3.mutable_data(dim1, place); - - // outbox0 = max ( (c_x - box_w_ratio)/image_w, 0 ) - // outbox1 = max ( (c_y - box_h_ratio)/image_h, 0 ) - // outbox2 = min ( (c_x + box_w_ratio)/image_w, 1 ) - // outbox3 = min ( (c_y + box_h_ratio)/image_h, 1 ) - F.Sub(&c_x, &box_w_ratio, &outbox0); - F.Sub(&c_y, &box_h_ratio, &outbox1); - F.Add(&c_x, &box_w_ratio, &outbox2); - F.Add(&c_y, &box_h_ratio, &outbox3); - F.Muls(&outbox0, static_cast(1.0 / image_w), &outbox0); - F.Muls(&outbox1, static_cast(1.0 / image_h), &outbox1); - F.Muls(&outbox2, static_cast(1.0 / image_w), &outbox2); - F.Muls(&outbox3, static_cast(1.0 / image_h), &outbox3); - - F.Maximum(&outbox0, &zero_t, &outbox0); - F.Maximum(&outbox1, &zero_t, &outbox1); - F.Minimum(&outbox2, &one_t, &outbox2); - F.Minimum(&outbox3, &one_t, &outbox3); - if (clip) { - // outbox0 = min ( outbox0, 1 ) - // outbox1 = min ( outbox1, 1 ) - // outbox2 = max ( outbox2, 0 ) - // outbox3 = max ( outbox3, 0 ) - F.Minimum(&outbox0, &one_t, &outbox0); - F.Minimum(&outbox1, &one_t, &outbox1); - F.Maximum(&outbox2, &zero_t, &outbox2); - F.Maximum(&outbox3, &zero_t, &outbox3); - } - - auto out_dim = phi::make_ddim( - {layer_h, layer_w, ratios_size * num_priors_per_ratio, 4}); - boxes->mutable_data(place); - vars->mutable_data(place); - phi::DenseTensor boxes_share(_type); - phi::DenseTensor vars_share(_type); - boxes_share.ShareDataWith(*boxes); - boxes_share.Resize(out_dim); - vars_share.ShareDataWith(*vars); - vars_share.Resize(out_dim); - - phi::DenseTensor box0(_type); - phi::DenseTensor box1(_type); - phi::DenseTensor box2(_type); - phi::DenseTensor box3(_type); - // out_dim = {layer_h, layer_w, ratios_size*num_priors_per_ratio, 1} - out_dim[3] = 1; - box0.mutable_data(out_dim, place); - box1.mutable_data(out_dim, place); - box2.mutable_data(out_dim, place); - box3.mutable_data(out_dim, place); - - std::vector vec_exp_out02 = {layer_h, 1, 1, 1}; - std::vector vec_exp_out13 = {1, layer_w, 1, 1}; - F.Tile(&outbox0, &box0, vec_exp_out02); - F.Tile(&outbox1, &box1, vec_exp_out13); - F.Tile(&outbox2, &box2, vec_exp_out02); - F.Tile(&outbox3, &box3, vec_exp_out13); - F.Concat({box0, box1, box2, box3}, 3, &boxes_share); - - std::vector multiples = { - layer_h, layer_w, ratios_size * num_priors_per_ratio, 1}; - phi::DenseTensor variances_t(_type); - // variances.size() == 4 - variances_t.mutable_data({4}, place); - F.FloatVec2Tsr(variances, &variances_t); - F.Tile(&variances_t, &vars_share, multiples); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_NPU_KERNEL(density_prior_box, - ops::DensityPriorBoxOpNPUKernel, - ops::DensityPriorBoxOpNPUKernel); diff --git a/paddle/fluid/operators/detection/iou_similarity_op_npu.cc b/paddle/fluid/operators/detection/iou_similarity_op_npu.cc deleted file mode 100644 index 8395e25d46251..0000000000000 --- a/paddle/fluid/operators/detection/iou_similarity_op_npu.cc +++ /dev/null @@ -1,204 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/detection/iou_similarity_op.h" - -namespace paddle { -namespace operators { - -template -struct IouFunction { - public: - explicit IouFunction(const framework::ExecutionContext& ctx) : ctx(ctx) { - place = ctx.GetPlace(); - stream = ctx.template device_context() - .stream(); - } - void Transpose(const phi::DenseTensor* x, - phi::DenseTensor* y, - const std::vector& axis) { - // y should be init first - const auto& runner = - NpuOpRunner("TransposeD", {*x}, {*y}, {{"perm", axis}}); - runner.Run(stream); - } - void Add(const phi::DenseTensor* x, - const phi::DenseTensor* y, - phi::DenseTensor* z) { - // y should be init first - const auto& runner = NpuOpRunner("AddV2", {*x, *y}, {*z}, {}); - runner.Run(stream); - } - void Sub(const phi::DenseTensor* x, - const phi::DenseTensor* y, - phi::DenseTensor* z) { - // y should be init first - const auto& runner = NpuOpRunner("Sub", {*x, *y}, {*z}, {}); - runner.Run(stream); - } - void Mul(const phi::DenseTensor* x, - const phi::DenseTensor* y, - phi::DenseTensor* z) { - // y should be init first - const auto& runner = NpuOpRunner("Mul", {*x, *y}, {*z}, {}); - runner.Run(stream); - } - void DivNoNan(const phi::DenseTensor* x, - const phi::DenseTensor* y, - phi::DenseTensor* z) { - // y should be init first - const auto& runner = NpuOpRunner("DivNoNan", {*x, *y}, {*z}, {}); - runner.Run(stream); - } - void Adds(const phi::DenseTensor* x, float scalar, phi::DenseTensor* y) { - // y should be init first - const auto& runner = NpuOpRunner("Adds", {*x}, {*y}, {{"value", scalar}}); - runner.Run(stream); - } - void Maximum(const phi::DenseTensor* x, - const phi::DenseTensor* y, - phi::DenseTensor* z) { - // z should be init first - const auto& runner = NpuOpRunner("Maximum", {*x, *y}, {*z}, {}); - runner.Run(stream); - } - void Minimum(const phi::DenseTensor* x, - const phi::DenseTensor* y, - phi::DenseTensor* z) { - // z should be init first - const auto& runner = NpuOpRunner("Minimum", {*x, *y}, {*z}, {}); - runner.Run(stream); - } - - private: - platform::Place place; - aclrtStream stream; - const framework::ExecutionContext& ctx; -}; - -template -class IouSimilarityNPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - bool normalized = ctx.Attr("box_normalized"); - auto* out = ctx.Output("Out"); - - auto _type = x->dtype(); - auto place = ctx.GetPlace(); - - IouFunction F(ctx); - - auto N = x->dims()[0]; - auto M = y->dims()[0]; - - out->mutable_data({N, M}, place); - phi::DenseTensor xt(_type); - phi::DenseTensor yt(_type); - xt.mutable_data({4, N}, place); - yt.mutable_data({4, M}, place); - std::vector vec_trans = {1, 0}; - F.Transpose(x, &xt, vec_trans); - F.Transpose(y, &yt, vec_trans); - phi::DenseTensor xmin1 = xt.Slice(0, 1); - phi::DenseTensor ymin1 = xt.Slice(1, 2); - phi::DenseTensor xmax1 = xt.Slice(2, 3); - phi::DenseTensor ymax1 = xt.Slice(3, 4); - phi::DenseTensor xmin2 = yt.Slice(0, 1); - phi::DenseTensor ymin2 = yt.Slice(1, 2); - phi::DenseTensor xmax2 = yt.Slice(2, 3); - phi::DenseTensor ymax2 = yt.Slice(3, 4); - xmin1.Resize({N, 1}); - ymin1.Resize({N, 1}); - xmax1.Resize({N, 1}); - ymax1.Resize({N, 1}); - xmin2.Resize({1, M}); - ymin2.Resize({1, M}); - xmax2.Resize({1, M}); - ymax2.Resize({1, M}); - - phi::DenseTensor w1(_type); - phi::DenseTensor h1(_type); - phi::DenseTensor w2(_type); - phi::DenseTensor h2(_type); - phi::DenseTensor area1(_type); - phi::DenseTensor area2(_type); - w1.mutable_data({N, 1}, place); - h1.mutable_data({N, 1}, place); - w2.mutable_data({1, M}, place); - h2.mutable_data({1, M}, place); - area1.mutable_data({N, 1}, place); - area2.mutable_data({1, M}, place); - F.Sub(&xmax1, &xmin1, &w1); - F.Sub(&ymax1, &ymin1, &h1); - F.Sub(&xmax2, &xmin2, &w2); - F.Sub(&ymax2, &ymin2, &h2); - if (!normalized) { - F.Adds(&w1, 1.0f, &w1); - F.Adds(&h1, 1.0f, &h1); - F.Adds(&w2, 1.0f, &w2); - F.Adds(&h2, 1.0f, &h2); - } - F.Mul(&w1, &h1, &area1); - F.Mul(&w2, &h2, &area2); - - phi::DenseTensor inter_xmax(_type); - phi::DenseTensor inter_ymax(_type); - phi::DenseTensor inter_xmin(_type); - phi::DenseTensor inter_ymin(_type); - inter_xmax.mutable_data({N, M}, place); - inter_ymax.mutable_data({N, M}, place); - inter_xmin.mutable_data({N, M}, place); - inter_ymin.mutable_data({N, M}, place); - F.Minimum(&xmax1, &xmax2, &inter_xmax); - F.Minimum(&ymax1, &ymax2, &inter_ymax); - F.Maximum(&xmin1, &xmin2, &inter_xmin); - F.Maximum(&ymin1, &ymin2, &inter_ymin); - - phi::DenseTensor inter_w(_type); - phi::DenseTensor inter_h(_type); - inter_w.mutable_data({N, M}, place); - inter_h.mutable_data({N, M}, place); - F.Sub(&inter_xmax, &inter_xmin, &inter_w); - F.Sub(&inter_ymax, &inter_ymin, &inter_h); - - if (!normalized) { - F.Adds(&inter_w, 1.0f, &inter_w); - F.Adds(&inter_h, 1.0f, &inter_h); - } - phi::DenseTensor zeros(_type); - zeros.mutable_data({1}, place); - FillNpuTensorWithConstant(&zeros, static_cast(0)); - F.Maximum(&inter_w, &zeros, &inter_w); - F.Maximum(&inter_h, &zeros, &inter_h); - - F.Mul(&inter_w, &inter_h, out); - phi::DenseTensor union_area(_type); - union_area.mutable_data({N, M}, place); - F.Add(&area1, &area2, &union_area); - F.Sub(&union_area, out, &union_area); - F.DivNoNan(out, &union_area, out); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_NPU_KERNEL(iou_similarity, - ops::IouSimilarityNPUKernel, - ops::IouSimilarityNPUKernel); diff --git a/paddle/fluid/operators/detection/prior_box_op_npu.cc b/paddle/fluid/operators/detection/prior_box_op_npu.cc deleted file mode 100644 index 7df68d2bbb1bb..0000000000000 --- a/paddle/fluid/operators/detection/prior_box_op_npu.cc +++ /dev/null @@ -1,106 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/detection/prior_box_op.h" - -namespace paddle { -namespace operators { - -template -class PriorBoxNPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* image = ctx.Input("Image"); - auto* boxes = ctx.Output("Boxes"); - auto* variances = ctx.Output("Variances"); - - PADDLE_ENFORCE_EQ(boxes->dims(), - variances->dims(), - platform::errors::Unimplemented( - "the shape of boxes and variances must be same in " - "the npu kernel of prior_box, but got boxes->dims() " - "= [%s], variances->dims() = [%s]", - boxes->dims(), - variances->dims())); - - auto min_sizes = ctx.Attr>("min_sizes"); - auto max_sizes = ctx.Attr>("max_sizes"); - auto aspect_ratios = ctx.Attr>("aspect_ratios"); - auto variances_attr = ctx.Attr>("variances"); - bool flip = ctx.Attr("flip"); - bool clip = ctx.Attr("clip"); - float step_w = ctx.Attr("step_w"); - float step_h = ctx.Attr("step_h"); - float offset = ctx.Attr("offset"); - - auto place = ctx.GetPlace(); - - phi::DenseTensor out(input->type()); - auto out_dims = phi::vectorize(boxes->dims()); - out_dims.insert(out_dims.begin(), 2); - out.Resize(phi::make_ddim(out_dims)); - out.mutable_data(place); - - framework::NPUAttributeMap attr_input = {{"min_size", min_sizes}, - {"max_size", max_sizes}, - {"aspect_ratio", aspect_ratios}, - {"step_h", step_h}, - {"step_w", step_w}, - {"flip", flip}, - {"clip", clip}, - {"offset", offset}, - {"variance", variances_attr}}; - - auto stream = - ctx.template device_context() - .stream(); - - const auto& runner = - NpuOpRunner("PriorBox", {*input, *image}, {out}, attr_input); - runner.Run(stream); - - out.Resize(phi::make_ddim({out.numel()})); - phi::DenseTensor out_boxes = out.Slice(0, boxes->numel()); - phi::DenseTensor out_variances = out.Slice(boxes->numel(), out.numel()); - - out_boxes.Resize(boxes->dims()); - out_variances.Resize(variances->dims()); - - boxes->mutable_data(place); - variances->mutable_data(place); - - framework::TensorCopy( - out_boxes, - place, - ctx.template device_context(), - boxes); - framework::TensorCopy( - out_variances, - place, - ctx.template device_context(), - variances); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_NPU_KERNEL( - prior_box, - ops::PriorBoxNPUKernel, - ops::PriorBoxNPUKernel);