delete relu6_raw (PaddlePaddle#55383)

* delete relu6_raw * fix codestyle * Update test_mkldnn_matmul_activation_fuse_pass.py * fix * Update backward.yaml * Update ops.yaml * Update backward.yaml
cqulilujia · Jul 24, 2023 · 61ab1ae · 61ab1ae
1 parent 5545d56
commit 61ab1ae
Show file tree

Hide file tree

Showing 24 changed files with 71 additions and 143 deletions.
diff --git a/paddle/phi/api/yaml/backward.yaml b/paddle/phi/api/yaml/backward.yaml
@@ -1740,6 +1740,17 @@
     func : reciprocal_grad
   inplace : (out_grad -> x_grad)
 
+- backward_op : relu6_grad
+  forward : relu6 (Tensor x) -> Tensor(out)
+  args : (Tensor out, Tensor out_grad)
+  output : Tensor(x_grad)
+  infer_meta :
+    func : UnchangedInferMeta
+    param : [out]
+  kernel :
+    func : relu6_grad
+  inplace : (out_grad -> x_grad)
+
 - backward_op : relu_double_grad
   forward : relu_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x)
   args : (Tensor out, Tensor grad_x_grad)

diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -485,17 +485,6 @@
     func : prod_grad
   composite: prod_grad(x, out, out_grad, dims, keep_dim, reduce_all, x_grad)
 
-- backward_op : relu6_grad
-  forward : relu6 (Tensor x) -> Tensor(out)
-  args : (Tensor out, Tensor out_grad)
-  output : Tensor(x_grad)
-  infer_meta :
-    func : UnchangedInferMeta
-    param : [out]
-  kernel :
-    func : relu6_grad
-  inplace : (out_grad -> x_grad)
-
 - backward_op : repeat_interleave_grad
   forward : repeat_interleave(Tensor x, int repeats, int axis) -> Tensor(out)
   args : (Tensor x, Tensor out_grad, int repeats, int axis)

diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml
@@ -720,16 +720,6 @@
     data_type : dtype
     backend : place
 
-- op : relu6
-  args : (Tensor x)
-  output : Tensor
-  infer_meta :
-    func : UnchangedInferMeta
-    param : [x]
-  kernel :
-    func : relu6
-  backward : relu6_grad
-
 - op : remainder
   args : (Tensor x, Tensor y)
   output : Tensor (out)

diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml
@@ -2251,7 +2251,7 @@
   outputs :
     out : Out
   extra :
-    attrs : [bool use_mkldnn = false]
+    attrs : [bool use_mkldnn = false, float threshold = 6.0]
 
 - op : remainder (elementwise_mod)
   inputs :

diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml
@@ -1996,6 +1996,16 @@
   inplace : (x -> out)
   backward : relu_grad
 
+- op : relu6
+  args : (Tensor x)
+  output : Tensor
+  infer_meta :
+    func : UnchangedInferMeta
+    param : [x]
+  kernel :
+    func : relu6
+  backward : relu6_grad
+
 - op : renorm
   args : (Tensor x, float p, int axis, float max_norm)
   output : Tensor

diff --git a/paddle/phi/api/yaml/static_backward.yaml b/paddle/phi/api/yaml/static_backward.yaml
@@ -278,17 +278,6 @@
     func : prod_grad
   composite: prod_grad(x, out, out_grad, dims, keep_dim, reduce_all, x_grad)
 
-- backward_op : relu6_grad
-  forward : relu6 (Tensor x, float threshold = 6.0f) -> Tensor(out)
-  args : (Tensor out, Tensor out_grad)
-  output : Tensor(x_grad)
-  infer_meta :
-    func : UnchangedInferMeta
-    param : [out]
-  kernel :
-    func : relu6_grad
-  inplace : (out_grad -> x_grad)
-
 - backward_op : rnn_grad
   forward : rnn (Tensor x, Tensor[] pre_state, Tensor[] weight_list, Tensor sequence_length, float dropout_prob=0.0, bool is_bidirec=false, int input_size=10, int hidden_size=100, int num_layers=1, str mode="RNN_TANH", int seed=0, bool is_test=false) -> Tensor(out), Tensor(dropout_state_out), Tensor[](state), Tensor(reserve)
   args : (Tensor x, Tensor[] pre_state, Tensor[] weight_list, Tensor sequence_length, Tensor out, Tensor dropout_state_out, Tensor reserve, Tensor out_grad, Tensor[] state_grad, float dropout_prob, bool is_bidirec, int input_size, int hidden_size, int num_layers, str mode, int seed, bool is_test)

diff --git a/paddle/phi/api/yaml/static_ops.yaml b/paddle/phi/api/yaml/static_ops.yaml
@@ -544,16 +544,6 @@
     func : reduce_scatter
     param: [x, nranks]
 
-- op : relu6
-  args : (Tensor x, float threshold = 6.0f)
-  output : Tensor
-  infer_meta :
-    func : UnchangedInferMeta
-    param : [x]
-  kernel :
-    func : relu6_raw
-  backward : relu6_grad
-
 - op : remainder
   args : (Tensor x, Tensor y, int axis = -1)
   output : Tensor (out)

diff --git a/paddle/phi/kernels/activation_kernel.cc b/paddle/phi/kernels/activation_kernel.cc
diff --git a/paddle/phi/kernels/activation_kernel.h b/paddle/phi/kernels/activation_kernel.h
@@ -75,7 +75,6 @@ DECLARE_ACTIVATION_KERNEL(Negative)
 
 DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(LeakyRelu, alpha)
 DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu, threshold)
-DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Relu6Raw, threshold)
 DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(SoftShrink, lambda)
 DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Mish, threshold)
 DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(HardShrink, threshold)

diff --git a/paddle/phi/kernels/cpu/activation_kernel.cc b/paddle/phi/kernels/cpu/activation_kernel.cc
@@ -109,7 +109,6 @@ DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(LeakyRelu, LeakyReluFunctor, alpha)
 DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu,
                                      ThresholdedReluFunctor,
                                      threshold)
-DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Relu6Raw, Relu6Functor, threshold)
 DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Mish, MishFunctor, threshold)
 DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(HardShrink, HardShrinkFunctor, threshold)
 DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(SoftShrink, SoftShrinkFunctor, lambda)
@@ -150,6 +149,17 @@ void SwishKernel(const Context& dev_ctx,
   ActivationImpl<T, T, Context, funcs::SwishFunctor<T>>(
       dev_ctx, x, out, functor);
 }
+
+template <typename T, typename Context>
+void Relu6Kernel(const Context& dev_ctx,
+                 const DenseTensor& x,
+                 DenseTensor* out) {
+  funcs::Relu6Functor<T> functor;
+  auto attrs = functor.GetAttrs();
+  *(attrs[0].second) = 6.0;
+  ActivationImpl<T, T, Context, funcs::Relu6Functor<T>>(
+      dev_ctx, x, out, functor);
+}
 }  // namespace phi
 PD_REGISTER_KERNEL(relu, CPU, ALL_LAYOUT, phi::ReluKernel, float, double) {}
 
@@ -171,7 +181,6 @@ PD_REGISTER_ACTIVATION_KERNEL(tanh, TanhKernel)
 PD_REGISTER_ACTIVATION_KERNEL(hardtanh, HardTanhKernel)
 PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel)
 PD_REGISTER_ACTIVATION_KERNEL(thresholded_relu, ThresholdedReluKernel)
-PD_REGISTER_ACTIVATION_KERNEL(relu6_raw, Relu6RawKernel)
 PD_REGISTER_ACTIVATION_KERNEL(hard_shrink, HardShrinkKernel)
 PD_REGISTER_ACTIVATION_KERNEL(softshrink, SoftShrinkKernel)
 PD_REGISTER_ACTIVATION_KERNEL(tanh_shrink, TanhShrinkKernel)
@@ -212,6 +221,7 @@ PD_REGISTER_ACTIVATION_KERNEL(sigmoid, SigmoidKernel)
 PD_REGISTER_ACTIVATION_KERNEL(logsigmoid, LogSigmoidKernel)
 PD_REGISTER_ACTIVATION_KERNEL(hard_sigmoid, HardSigmoidKernel)
 PD_REGISTER_ACTIVATION_KERNEL(swish, SwishKernel)
+PD_REGISTER_ACTIVATION_KERNEL(relu6, Relu6Kernel)
 
 PD_REGISTER_KERNEL(log,
                    CPU,

diff --git a/paddle/phi/kernels/gpu/activation_kernel.cu b/paddle/phi/kernels/gpu/activation_kernel.cu
@@ -126,7 +126,6 @@ DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(LogitCUDA, CudaLogitFunctor, eps)
 DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu,
                                      CudaThresholdedReluFunctor,
                                      threshold)
-DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(Relu6Raw, CudaRelu6Functor, threshold)
 DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(HardShrink,
                                      CudaHardShrinkFunctor,
                                      threshold)
@@ -176,6 +175,17 @@ void SwishKernel(const Context& dev_ctx,
   ActivationGPUImpl<T, Context, funcs::CudaSwishFunctor<T>>(
       dev_ctx, x, out, functor);
 }
+
+template <typename T, typename Context>
+void Relu6Kernel(const Context& dev_ctx,
+                 const DenseTensor& x,
+                 DenseTensor* out) {
+  funcs::CudaRelu6Functor<T> functor;
+  auto attrs = functor.GetAttrs();
+  *(attrs[0].second) = 6.0;
+  ActivationGPUImpl<T, Context, funcs::CudaRelu6Functor<T>>(
+      dev_ctx, x, out, functor);
+}
 }  // namespace phi
 
 #ifdef PADDLE_WITH_HIP
@@ -221,7 +231,7 @@ PD_REGISTER_ACTIVATION_KERNEL(atanh, AtanhKernel)
 PD_REGISTER_ACTIVATION_KERNEL(tanh, TanhKernel)
 PD_REGISTER_ACTIVATION_KERNEL(hardtanh, HardTanhKernel)
 PD_REGISTER_ACTIVATION_KERNEL(thresholded_relu, ThresholdedReluKernel)
-PD_REGISTER_ACTIVATION_KERNEL(relu6_raw, Relu6RawKernel)
+PD_REGISTER_ACTIVATION_KERNEL(relu6, Relu6Kernel)
 PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel)
 PD_REGISTER_ACTIVATION_KERNEL(mish, MishKernel)
 PD_REGISTER_ACTIVATION_KERNEL(stanh, StanhKernel)

diff --git a/paddle/phi/kernels/onednn/activation_kernel.cc b/paddle/phi/kernels/onednn/activation_kernel.cc
@@ -178,12 +178,11 @@ void GeluKernel(const Context& dev_ctx,
 }
 
 template <typename T, typename Context>
-void Relu6RawKernel(const Context& dev_ctx,
-                    const DenseTensor& x,
-                    float threshold,
-                    DenseTensor* out) {
+void Relu6Kernel(const Context& dev_ctx,
+                 const DenseTensor& x,
+                 DenseTensor* out) {
   Relu6OneDNNFunctor<T> functor;
-  functor(dev_ctx, x, 0, threshold, out);
+  functor(dev_ctx, x, 0, 6.0, out);
 }
 
 template <typename T, typename Context>
@@ -210,7 +209,7 @@ PD_REGISTER_ACTIVATION_KERNEL(hardswish, HardSwishKernel)
 PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel)
 PD_REGISTER_ACTIVATION_KERNEL(mish, MishKernel)
 PD_REGISTER_ACTIVATION_KERNEL(relu, ReluKernel)
-PD_REGISTER_ACTIVATION_KERNEL(relu6_raw, Relu6RawKernel)
+PD_REGISTER_ACTIVATION_KERNEL(relu6, Relu6Kernel)
 PD_REGISTER_ACTIVATION_KERNEL(sigmoid, SigmoidKernel)
 PD_REGISTER_ACTIVATION_KERNEL(sqrt, SqrtKernel)
 PD_REGISTER_ACTIVATION_KERNEL(swish, SwishKernel)

diff --git a/paddle/phi/kernels/sparse/cpu/unary_kernel.cc b/paddle/phi/kernels/sparse/cpu/unary_kernel.cc
@@ -95,7 +95,6 @@ PD_REGISTER_SPARSE_UNARY_CPU_KERNEL(pow, Pow)
 PD_REGISTER_SPARSE_UNARY_CPU_KERNEL(scale, Scale)
 PD_REGISTER_SPARSE_UNARY_CPU_KERNEL(expm1, Expm1)
 PD_REGISTER_SPARSE_UNARY_CPU_KERNEL(relu6, Relu6)
-PD_REGISTER_SPARSE_UNARY_CPU_KERNEL(relu6_raw, Relu6Raw)
 PD_REGISTER_SPARSE_UNARY_CPU_KERNEL(leaky_relu, LeakyRelu)
 
 PD_REGISTER_KERNEL(divide_scalar_coo,

diff --git a/paddle/phi/kernels/sparse/gpu/unary_kernel.cu b/paddle/phi/kernels/sparse/gpu/unary_kernel.cu
@@ -99,7 +99,6 @@ PD_REGISTER_SPARSE_UNARY_GPU_KERNEL(abs, Abs)
 PD_REGISTER_SPARSE_UNARY_GPU_KERNEL(pow, Pow)
 PD_REGISTER_SPARSE_UNARY_GPU_KERNEL(scale, Scale)
 PD_REGISTER_SPARSE_UNARY_GPU_KERNEL(expm1, Expm1)
-PD_REGISTER_SPARSE_UNARY_GPU_KERNEL(relu6_raw, Relu6Raw)
 PD_REGISTER_SPARSE_UNARY_GPU_KERNEL(relu6, Relu6)
 PD_REGISTER_SPARSE_UNARY_GPU_KERNEL(leaky_relu, LeakyRelu)
 

diff --git a/paddle/phi/kernels/sparse/impl/unary_kernel_impl.h b/paddle/phi/kernels/sparse/impl/unary_kernel_impl.h
@@ -89,24 +89,10 @@ DEFINE_SPARSE_UNARY_KERNEL(Log1p)
 DEFINE_SPARSE_UNARY_KERNEL(Relu)
 DEFINE_SPARSE_UNARY_KERNEL(Abs)
 DEFINE_SPARSE_UNARY_KERNEL(Expm1)
+DEFINE_SPARSE_UNARY_KERNEL(Relu6)
 DEFINE_SPARSE_UNARY_KERNEL_WITH_ONE_ATTR(Pow, factor)
-DEFINE_SPARSE_UNARY_KERNEL_WITH_ONE_ATTR(Relu6Raw, threshold)
 DEFINE_SPARSE_UNARY_KERNEL_WITH_ONE_ATTR(LeakyRelu, alpha)
 
-template <typename T, typename Context>
-void Relu6CooKernel(const Context& dev_ctx,
-                    const SparseCooTensor& x,
-                    SparseCooTensor* out) {
-  Relu6RawCooKernel<T, Context>(dev_ctx, x, 6, out);
-}
-
-template <typename T, typename Context>
-void Relu6CsrKernel(const Context& dev_ctx,
-                    const SparseCsrTensor& x,
-                    SparseCsrTensor* out) {
-  Relu6RawCsrKernel<T, Context>(dev_ctx, x, 6, out);
-}
-
 template <typename T, typename Context>
 void ScaleCooKernel(const Context& dev_ctx,
                     const SparseCooTensor& x,

diff --git a/paddle/phi/kernels/sparse/unary_kernel.h b/paddle/phi/kernels/sparse/unary_kernel.h
@@ -60,7 +60,6 @@ DECLARE_SPARSE_UNARY_KERNEL(Log1p)
 DECLARE_SPARSE_UNARY_KERNEL(Abs)
 DECLARE_SPARSE_UNARY_KERNEL(Expm1)
 DECLARE_SPARSE_UNARY_KERNEL_WITH_ONE_ATTR(Pow, factor)
-DECLARE_SPARSE_UNARY_KERNEL_WITH_ONE_ATTR(Relu6Raw, threshold)
 DECLARE_SPARSE_UNARY_KERNEL_WITH_ONE_ATTR(LeakyRelu, alpha)
 
 template <typename T, typename Context>

diff --git a/paddle/phi/kernels/xpu/activation_kernel.cc b/paddle/phi/kernels/xpu/activation_kernel.cc
@@ -415,6 +415,16 @@ void SwishKernel(const Context& dev_ctx,
   PADDLE_ENFORCE_XDNN_SUCCESS(r, "swish");
 }
 
+template <typename T, typename Context>
+void Relu6Kernel(const Context& dev_ctx,
+                 const DenseTensor& x,
+                 DenseTensor* out) {
+  XPURelu6Functor<T> functor;
+  auto attrs = functor.GetAttrs();
+  *(attrs[0].second) = 6.0;
+  ActivationXPUImpl<T, Context, XPURelu6Functor<T>>(dev_ctx, x, out, functor);
+}
+
 template <typename T>
 struct XPUSoftplusFunctor : public funcs::BaseActivationFunctor<T> {
   using XPUType = typename XPUTypeTrait<T>::Type;
@@ -504,10 +514,6 @@ DEFINE_XPU_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Mish, XPUMishFunctor, threshold)
 DEFINE_XPU_ACTIVATION_KERNEL_WITH_ONE_ATTRS(LeakyRelu,
                                             XPULeakyReluFunctor,
                                             alpha)
-DEFINE_XPU_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Relu6Raw,
-                                            XPURelu6Functor,
-                                            threshold)
-
 DEFINE_XPU_ACTIVATION_KERNEL_WITH_TWO_ATTRS(Softplus,
                                             XPUSoftplusFunctor,
                                             beta,
@@ -567,12 +573,8 @@ PD_REGISTER_KERNEL(
 PD_REGISTER_KERNEL(
     log, XPU, ALL_LAYOUT, phi::LogKernel, float, phi::dtype::float16) {}
 
-PD_REGISTER_KERNEL(relu6_raw,
-                   XPU,
-                   ALL_LAYOUT,
-                   phi::Relu6RawKernel,
-                   float,
-                   phi::dtype::float16) {}
+PD_REGISTER_KERNEL(
+    relu6, XPU, ALL_LAYOUT, phi::Relu6Kernel, float, phi::dtype::float16) {}
 
 #define PD_REGISTER_ACTIVATION_KERNEL(name, func) \
   PD_REGISTER_KERNEL(name, XPU, ALL_LAYOUT, phi::func, float) {}

diff --git a/test/ir/inference/test_conv_act_onednn_fuse_pass.py b/test/ir/inference/test_conv_act_onednn_fuse_pass.py
@@ -166,7 +166,7 @@ def sample_program_config(self, draw):
                 'relu6',
                 inputs={'X': ['conv2d_out']},
                 outputs={'Out': ['relu_out']},
-                threshold=draw(st.floats(min_value=1.0, max_value=10.0)),
+                threshold=6.0,
             )
         elif act_type == 'leaky_relu':
             act_op = OpConfig(

diff --git a/test/ir/inference/test_mkldnn_matmul_activation_fuse_pass.py b/test/ir/inference/test_mkldnn_matmul_activation_fuse_pass.py
@@ -86,7 +86,7 @@ def generate_input(type):
                 activation_type,
                 inputs={"X": ["matmul_output"]},
                 outputs={"Out": ["activation_output"]},
-                threshold=draw(st.floats(min_value=1.0, max_value=10.0)),
+                threshold=6,
             )
         elif activation_type == "leaky_relu":
             activation_op = OpConfig(

diff --git a/test/ir/inference/test_mkldnn_matmul_elementwise_add_activation_fuse_pass.py b/test/ir/inference/test_mkldnn_matmul_elementwise_add_activation_fuse_pass.py
@@ -81,7 +81,7 @@ def generate_input():
                 activation_type,
                 inputs={"X": ["elementwise_add_output"]},
                 outputs={"Out": ["activation_output"]},
-                threshold=draw(st.floats(min_value=1.0, max_value=10.0)),
+                threshold=6.0,
             )
         elif activation_type == "leaky_relu":
             activation_op = OpConfig(