From 6b1c960058fedb477ec407c6e619c5b348d3bd37 Mon Sep 17 00:00:00 2001 From: samukweku Date: Thu, 9 Mar 2023 21:00:48 +1100 Subject: [PATCH 001/124] changes to allow more flexibility for reduction operations --- src/core/column/minmax.h | 2 +- src/core/column/reduce_unary.h | 5 ++-- src/core/column/sumprod.h | 2 +- src/core/expr/fexpr_mean.cc | 43 ++++++++++++++++++++++------------ src/core/expr/fexpr_sumprod.cc | 13 +++++----- 5 files changed, 39 insertions(+), 26 deletions(-) diff --git a/src/core/column/minmax.h b/src/core/column/minmax.h index 9bfdb2905..95fbd0599 100644 --- a/src/core/column/minmax.h +++ b/src/core/column/minmax.h @@ -57,7 +57,7 @@ class MinMax_ColumnImpl : public ReduceUnary_ColumnImpl { } } - *out = static_cast(res); + *out = static_cast(res); return !res_isna; } }; diff --git a/src/core/column/reduce_unary.h b/src/core/column/reduce_unary.h index 08bbdf8d0..3eeb39255 100644 --- a/src/core/column/reduce_unary.h +++ b/src/core/column/reduce_unary.h @@ -26,15 +26,16 @@ namespace dt { -template +template class ReduceUnary_ColumnImpl : public Virtual_ColumnImpl { protected: Column col_; Groupby gby_; + public: ReduceUnary_ColumnImpl(Column &&col, const Groupby& gby) - : Virtual_ColumnImpl(gby.size(), col.stype()), + : Virtual_ColumnImpl(gby.size(), stype_from), col_(std::move(col)), gby_(gby) { diff --git a/src/core/column/sumprod.h b/src/core/column/sumprod.h index d709a3a6f..10bb3a074 100644 --- a/src/core/column/sumprod.h +++ b/src/core/column/sumprod.h @@ -31,7 +31,7 @@ class SumProd_ColumnImpl : public ReduceUnary_ColumnImpl { public: using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; - bool get_element(size_t i, T* out) const override { + bool get_element(size_t i, U* out) const override { T result = !SUM; // 0 for `sum()` and 1 for `prod()` T value; size_t i0, i1; diff --git a/src/core/expr/fexpr_mean.cc b/src/core/expr/fexpr_mean.cc index 716de2182..162417367 100644 --- a/src/core/expr/fexpr_mean.cc +++ b/src/core/expr/fexpr_mean.cc @@ -52,17 +52,27 @@ class FExpr_Mean : public FExpr_ReduceUnary { )); case SType::BOOL: case SType::INT8: + return make(std::move(col), gby, is_grouped); case SType::INT16: - case SType::INT32: + return make(std::move(col), gby, is_grouped); + case SType::INT32: + return make(std::move(col), gby, is_grouped); case SType::INT64: - case SType::DATE32: - case SType::TIME64: + return make(std::move(col), gby, is_grouped); case SType::FLOAT64: - col_out = make(std::move(col), SType::FLOAT64, gby, is_grouped); - break; + return make(std::move(col), gby, is_grouped); case SType::FLOAT32: - col_out = make(std::move(col), SType::FLOAT32, gby, is_grouped); - break; + return make(std::move(col), gby, is_grouped); + case SType::DATE32: { + Column coli = make(std::move(col), gby, is_grouped); + coli.cast_inplace(SType::DATE32); + return coli; + } + case SType::TIME64: { + Column coli = make(std::move(col), gby, is_grouped); + coli.cast_inplace(SType::TIME64); + return coli; + } default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr(); @@ -75,14 +85,17 @@ class FExpr_Mean : public FExpr_ReduceUnary { } - template - Column make(Column&& col, SType stype, const Groupby& gby, bool is_grouped) const { - col.cast_inplace(stype); - - return is_grouped? std::move(col) - : Column(new Latent_ColumnImpl(new Mean_ColumnImpl( - std::move(col), gby - ))); + template + Column make(Column &&col, const Groupby& gby, bool is_grouped) const { + if (is_grouped) { + return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( + std::move(col), gby + ))); + } else { + return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( + std::move(col), gby + ))); + } } }; diff --git a/src/core/expr/fexpr_sumprod.cc b/src/core/expr/fexpr_sumprod.cc index ab1adc0bc..4daf57b84 100644 --- a/src/core/expr/fexpr_sumprod.cc +++ b/src/core/expr/fexpr_sumprod.cc @@ -55,11 +55,11 @@ class FExpr_SumProd : public FExpr_ReduceUnary { case SType::INT16: case SType::INT32: case SType::INT64: - return make(std::move(col), SType::INT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::FLOAT32: - return make(std::move(col), SType::FLOAT32, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::FLOAT64: - return make(std::move(col), SType::FLOAT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr(); @@ -68,14 +68,13 @@ class FExpr_SumProd : public FExpr_ReduceUnary { template - Column make(Column&& col, SType stype, const Groupby& gby, bool is_grouped) const { - col.cast_inplace(stype); + Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( + return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( std::move(col), gby ))); } else { - return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( + return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( std::move(col), gby ))); } From b79dc27556dc7eca3b0dc44fef52c369d14498f4 Mon Sep 17 00:00:00 2001 From: samukweku Date: Fri, 10 Mar 2023 05:07:44 +1100 Subject: [PATCH 002/124] add countna with additional tests --- src/core/column/countna.h | 56 ++++++++++++++ src/core/expr/fexpr_countna.cc | 134 +++++++++++++++++++++++++++++++++ src/datatable/expr/expr.py | 1 - src/datatable/expr/reduce.py | 2 +- tests/dt/test-countna.py | 2 +- tests/types/test-void.py | 4 + 6 files changed, 196 insertions(+), 3 deletions(-) create mode 100644 src/core/column/countna.h create mode 100644 src/core/expr/fexpr_countna.cc diff --git a/src/core/column/countna.h b/src/core/column/countna.h new file mode 100644 index 000000000..d4ac14205 --- /dev/null +++ b/src/core/column/countna.h @@ -0,0 +1,56 @@ +//------------------------------------------------------------------------------ +// Copyright 2022 H2O.ai +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. +//------------------------------------------------------------------------------ +#ifndef dt_COLUMN_COUNTNA_h +#define dt_COLUMN_COUNTNA_h +#include "column/reduce_unary.h" +namespace dt { + + +template +class CountNA_ColumnImpl : public ReduceUnary_ColumnImpl { + public: + using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; + + bool get_element(size_t i, U* out) const override { + T value; + size_t i0, i1; + this->gby_.get_group(i, &i0, &i1); + int64_t count = 0; + + if (IS_GROUPED){ + bool isvalid = this->col_.get_element(i, &value); + count = isvalid? 0: static_cast(i1 - i0); + *out = count; + return true; + } else { + for (size_t gi = i0; gi < i1; ++gi) { + bool isvalid = this->col_.get_element(gi, &value); + count += !isvalid; + } + *out = count; + return true; // *out is not NA + } + } +}; + +} // namespace dt +#endif diff --git a/src/core/expr/fexpr_countna.cc b/src/core/expr/fexpr_countna.cc new file mode 100644 index 000000000..6cf4aa493 --- /dev/null +++ b/src/core/expr/fexpr_countna.cc @@ -0,0 +1,134 @@ +//------------------------------------------------------------------------------ +// Copyright 2022 H2O.ai +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. +//------------------------------------------------------------------------------ +#include "column/const.h" +#include "column/latent.h" +#include "column/countna.h" +#include "documentation.h" +#include "expr/fexpr_func.h" +#include "expr/eval_context.h" +#include "expr/workframe.h" +#include "python/xargs.h" +#include "stype.h" +namespace dt { +namespace expr { + + +class FExpr_CountNA : public FExpr_Func { + private: + ptrExpr arg_; + + public: + FExpr_CountNA(ptrExpr &&arg) + : arg_(std::move(arg)) {} + + std::string repr() const override { + std::string out = "countna"; + out += '('; + out += arg_->repr(); + out += ')'; + return out; + } + + + Workframe evaluate_n(EvalContext &ctx) const override { + Workframe outputs(ctx); + Workframe wf = arg_->evaluate_n(ctx); + Groupby gby = ctx.get_groupby(); + + if (!gby) { + gby = Groupby::single_group(wf.nrows()); + } + + for (size_t i = 0; i < wf.ncols(); ++i) { + bool is_grouped = ctx.has_group_column( + wf.get_frame_id(i), + wf.get_column_id(i) + ); + + Column coli = evaluate1(wf.retrieve_column(i), gby, is_grouped); + outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); + } + + return outputs; + } + + + Column evaluate1(Column &&col, const Groupby& gby, bool is_grouped) const { + SType stype = col.stype(); + + switch (stype) { + case SType::VOID: + case SType::BOOL: + case SType::INT8: + return make(std::move(col), gby, is_grouped); + case SType::INT16: + return make(std::move(col), gby, is_grouped); + case SType::DATE32: + case SType::INT32: + return make(std::move(col), gby, is_grouped); + case SType::TIME64: + case SType::INT64: + return make(std::move(col), gby, is_grouped); + case SType::FLOAT32: + return make(std::move(col), gby, is_grouped); + case SType::FLOAT64: + return make(std::move(col), gby, is_grouped); + case SType::STR32: + case SType::STR64: + return make(std::move(col), gby, is_grouped); + default: + throw TypeError() + << "Invalid column of type `" << stype << "` in " << repr(); + } + } + + + template + Column make(Column &&col, const Groupby& gby, bool is_grouped) const { + if (is_grouped) { + return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( + std::move(col), gby + ))); + } else { + return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( + std::move(col), gby + ))); + } + } +}; + + + +static py::oobj pyfn_countna(const py::XArgs &args) { + auto countna = args[0].to_oobj(); + return PyFExpr::make(new FExpr_CountNA(as_fexpr(countna))); +} + +DECLARE_PYFN(&pyfn_countna) + ->name("countna") + ->docs(doc_dt_countna) + ->arg_names({"cols"}) + ->n_positional_args(1) + ->n_required_args(1); + + +}} // dt::expr diff --git a/src/datatable/expr/expr.py b/src/datatable/expr/expr.py index a91782d4b..e5a342a68 100644 --- a/src/datatable/expr/expr.py +++ b/src/datatable/expr/expr.py @@ -59,7 +59,6 @@ class OpCodes(enum.Enum): MEDIAN = 410 COV = 411 CORR = 412 - COUNTNA = 413 NUNIQUE = 414 # Math: trigonometric diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index a8ee39bfe..06c4d9ffd 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -57,7 +57,7 @@ def nunique(iterable=None): def countna(iterable=None): - return Expr(OpCodes.COUNTNA, (iterable,)) + return core.countna(iterable) def first(iterable): diff --git a/tests/dt/test-countna.py b/tests/dt/test-countna.py index b30b4a8b9..f8660f41d 100644 --- a/tests/dt/test-countna.py +++ b/tests/dt/test-countna.py @@ -71,6 +71,6 @@ def test_dt_count_na1(src): def test_dt_count_na2(): DT = dt.Frame(G=[1,1,1,2,2,2], V=[None, None, None, None, 3, 5]) - EXP = dt.Frame(G=[1,2], V1=[3,1], V2=[3,0]) + EXP = dt.Frame(G=[1,2], V1=[3,1], V2=[1,0]) RES = DT[:, [dt.countna(f.V), dt.countna(dt.mean(f.V))], dt.by(f.G)] assert EXP.to_list() == RES.to_list() diff --git a/tests/types/test-void.py b/tests/types/test-void.py index d41845c89..da36e3040 100644 --- a/tests/types/test-void.py +++ b/tests/types/test-void.py @@ -216,6 +216,10 @@ def test_groupby_void_reducer(): DT = dt.Frame([None] * 5)[:, dt.count(), dt.by(0)] assert_equals(DT, dt.Frame(C0=[None], count=[5]/dt.int64)) +def test_groupby_void_countna(): + DT = dt.Frame([None] * 5)[:, dt.countna(f[0]), dt.by(0)] + assert_equals(DT, dt.Frame(C0=[None], C1=[5]/dt.int64)) + def test_groupby_void_twice(): # See issue #3108 From 4222481d891e303dad82a1db716f5e40bdf731f1 Mon Sep 17 00:00:00 2001 From: samukweku Date: Fri, 10 Mar 2023 05:09:27 +1100 Subject: [PATCH 003/124] update countna doc link --- docs/api/dt/countna.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api/dt/countna.rst b/docs/api/dt/countna.rst index 078b4bb14..ff8570516 100644 --- a/docs/api/dt/countna.rst +++ b/docs/api/dt/countna.rst @@ -1,6 +1,6 @@ .. xfunction:: datatable.countna - :src: src/core/expr/head_reduce_unary.cc op_countna + :src: src/core/expr/fexpr_minmax.cc pyfn_countna :tests: tests/test-reduce.py :cvar: doc_dt_countna :signature: countna(cols) From 8cd4106a619738c0a5635e55f25ab95d683afccc Mon Sep 17 00:00:00 2001 From: samukweku Date: Fri, 10 Mar 2023 21:20:30 +1100 Subject: [PATCH 004/124] add count fexpr --- src/core/column/countna.h | 11 +- src/core/expr/fexpr_countna.cc | 23 +- src/core/expr/head_reduce_unary.cc | 336 ++++++++++++++--------------- src/datatable/expr/reduce.py | 2 +- 4 files changed, 194 insertions(+), 178 deletions(-) diff --git a/src/core/column/countna.h b/src/core/column/countna.h index d4ac14205..a42cc8431 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -25,7 +25,7 @@ namespace dt { -template +template class CountNA_ColumnImpl : public ReduceUnary_ColumnImpl { public: using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; @@ -38,13 +38,18 @@ class CountNA_ColumnImpl : public ReduceUnary_ColumnImpl { if (IS_GROUPED){ bool isvalid = this->col_.get_element(i, &value); - count = isvalid? 0: static_cast(i1 - i0); + if (COUNT){ + count = isvalid? static_cast(i1 - i0) : 0; + } else { + count = isvalid? 0: static_cast(i1 - i0); + } + *out = count; return true; } else { for (size_t gi = i0; gi < i1; ++gi) { bool isvalid = this->col_.get_element(gi, &value); - count += !isvalid; + count += COUNT? isvalid : !isvalid; } *out = count; return true; // *out is not NA diff --git a/src/core/expr/fexpr_countna.cc b/src/core/expr/fexpr_countna.cc index 6cf4aa493..7f6d48e54 100644 --- a/src/core/expr/fexpr_countna.cc +++ b/src/core/expr/fexpr_countna.cc @@ -31,7 +31,7 @@ namespace dt { namespace expr { - +template class FExpr_CountNA : public FExpr_Func { private: ptrExpr arg_; @@ -41,7 +41,7 @@ class FExpr_CountNA : public FExpr_Func { : arg_(std::move(arg)) {} std::string repr() const override { - std::string out = "countna"; + std::string out = COUNT? "count" : "countna"; out += '('; out += arg_->repr(); out += ')'; @@ -105,24 +105,35 @@ class FExpr_CountNA : public FExpr_Func { template Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( + return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( std::move(col), gby ))); } else { - return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( + return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( std::move(col), gby ))); } } }; - +static py::oobj pyfn_count(const py::XArgs &args) { + auto count = args[0].to_oobj_or_none(); + return PyFExpr::make(new FExpr_CountNA(as_fexpr(count))); +} static py::oobj pyfn_countna(const py::XArgs &args) { auto countna = args[0].to_oobj(); - return PyFExpr::make(new FExpr_CountNA(as_fexpr(countna))); + return PyFExpr::make(new FExpr_CountNA(as_fexpr(countna))); } + +DECLARE_PYFN(&pyfn_count) + ->name("count") + ->docs(doc_dt_count) + ->arg_names({"cols"}) + ->n_positional_args(1); + + DECLARE_PYFN(&pyfn_countna) ->name("countna") ->docs(doc_dt_countna) diff --git a/src/core/expr/head_reduce_unary.cc b/src/core/expr/head_reduce_unary.cc index 48cbb6200..6b129f2ae 100644 --- a/src/core/expr/head_reduce_unary.cc +++ b/src/core/expr/head_reduce_unary.cc @@ -308,170 +308,170 @@ static Column compute_gsd(Column&& arg, const Groupby& gby) { -//------------------------------------------------------------------------------ -// count(A) -//------------------------------------------------------------------------------ - -template -bool count_reducer(const Column& col, size_t i0, size_t i1, int64_t* out) { - int64_t count = 0; - for (size_t i = i0; i < i1; ++i) { - T value; - bool isvalid = col.get_element(i, &value); - count += isvalid; - } - *out = count; - return true; // *out is not NA -} - - -template -static Column _count(Column&& arg, const Groupby& gby) { - return Column( - new Latent_ColumnImpl( - new Reduced_ColumnImpl( - SType::INT64, std::move(arg), gby, count_reducer - ))); -} - - -static Column compute_count(Column&& arg, const Groupby& gby) { - switch (arg.stype()) { - case SType::VOID: return Column(new ConstInt_ColumnImpl( - gby.size(), 0, SType::INT64 - )); - case SType::BOOL: - case SType::INT8: return _count(std::move(arg), gby); - case SType::INT16: return _count(std::move(arg), gby); - case SType::DATE32: - case SType::INT32: return _count(std::move(arg), gby); - case SType::TIME64: - case SType::INT64: return _count(std::move(arg), gby); - case SType::FLOAT32: return _count(std::move(arg), gby); - case SType::FLOAT64: return _count(std::move(arg), gby); - case SType::STR32: - case SType::STR64: return _count(std::move(arg), gby); - default: throw _error("count", arg.stype()); - } -} - - -//------------------------------------------------------------------------------ -// countna -//------------------------------------------------------------------------------ - -template -bool op_countna(const Column& col, size_t i0, size_t i1, int64_t* out) { - int64_t count = 0; - for (size_t i = i0; i < i1; ++i) { - T value; - bool isvalid = col.get_element(i, &value); - count += !isvalid; - } - *out = count; - return true; // *out is not NA -} - - - -template -static Column _countna(Column&& arg, const Groupby& gby) { - return Column( - new Latent_ColumnImpl( - new Reduced_ColumnImpl( - SType::INT64, std::move(arg), gby, op_countna - ))); -} - -static Column compute_countna(Column&& arg, const Groupby& gby) { - switch (arg.stype()) { - case SType::VOID: - case SType::BOOL: - case SType::INT8: return _countna(std::move(arg), gby); - case SType::INT16: return _countna(std::move(arg), gby); - case SType::DATE32: - case SType::INT32: return _countna(std::move(arg), gby); - case SType::TIME64: - case SType::INT64: return _countna(std::move(arg), gby); - case SType::FLOAT32: return _countna(std::move(arg), gby); - case SType::FLOAT64: return _countna(std::move(arg), gby); - case SType::STR32: - case SType::STR64: return _countna(std::move(arg), gby); - default: throw _error("countna", arg.stype()); - } -} - - -//------------------------------------------------------------------------------ -// count/countna(A:grouped) -//------------------------------------------------------------------------------ - -// T is the type of the input column -template -class CountGrouped_ColumnImpl : public Virtual_ColumnImpl -{ - private: - Column arg; - Groupby groupby; - - public: - CountGrouped_ColumnImpl(Column&& col, const Groupby& grpby) - : Virtual_ColumnImpl(grpby.size(), SType::INT64), - arg(std::move(col)), - groupby(grpby) {} - - ColumnImpl* clone() const override { - return new CountGrouped_ColumnImpl(Column(arg), groupby); - } - - bool get_element(size_t i, int64_t* out) const override { - T value; - bool isvalid = arg.get_element(i, &value); - if (isvalid ^ NA) { - size_t i0, i1; - groupby.get_group(i, &i0, &i1); - *out = static_cast(i1 - i0); - } else { - *out = 0; - } - return true; - } - - size_t n_children() const noexcept override { - return 1; - } - - const Column& child(size_t i) const override { - xassert(i == 0); (void)i; - return arg; - } - -}; - - -template -static Column _gcount(Column&& arg, const Groupby& gby) { - return Column(new CountGrouped_ColumnImpl(std::move(arg), gby)); -} - -template -static Column compute_gcount(Column&& arg, const Groupby& gby) { - switch (arg.stype()) { - case SType::VOID: return Column(new ConstInt_ColumnImpl(1, 0, SType::INT64)); - case SType::BOOL: - case SType::INT8: return _gcount(std::move(arg), gby); - case SType::INT16: return _gcount(std::move(arg), gby); - case SType::DATE32: - case SType::INT32: return _gcount(std::move(arg), gby); - case SType::TIME64: - case SType::INT64: return _gcount(std::move(arg), gby); - case SType::FLOAT32: return _gcount(std::move(arg), gby); - case SType::FLOAT64: return _gcount(std::move(arg), gby); - case SType::STR32: - case SType::STR64: return _gcount(std::move(arg), gby); - default: throw _error("count", arg.stype()); - } -} +// //------------------------------------------------------------------------------ +// // count(A) +// //------------------------------------------------------------------------------ + +// template +// bool count_reducer(const Column& col, size_t i0, size_t i1, int64_t* out) { +// int64_t count = 0; +// for (size_t i = i0; i < i1; ++i) { +// T value; +// bool isvalid = col.get_element(i, &value); +// count += isvalid; +// } +// *out = count; +// return true; // *out is not NA +// } + + +// template +// static Column _count(Column&& arg, const Groupby& gby) { +// return Column( +// new Latent_ColumnImpl( +// new Reduced_ColumnImpl( +// SType::INT64, std::move(arg), gby, count_reducer +// ))); +// } + + +// static Column compute_count(Column&& arg, const Groupby& gby) { +// switch (arg.stype()) { +// case SType::VOID: return Column(new ConstInt_ColumnImpl( +// gby.size(), 0, SType::INT64 +// )); +// case SType::BOOL: +// case SType::INT8: return _count(std::move(arg), gby); +// case SType::INT16: return _count(std::move(arg), gby); +// case SType::DATE32: +// case SType::INT32: return _count(std::move(arg), gby); +// case SType::TIME64: +// case SType::INT64: return _count(std::move(arg), gby); +// case SType::FLOAT32: return _count(std::move(arg), gby); +// case SType::FLOAT64: return _count(std::move(arg), gby); +// case SType::STR32: +// case SType::STR64: return _count(std::move(arg), gby); +// default: throw _error("count", arg.stype()); +// } +// } + + +// //------------------------------------------------------------------------------ +// // countna +// //------------------------------------------------------------------------------ + +// template +// bool op_countna(const Column& col, size_t i0, size_t i1, int64_t* out) { +// int64_t count = 0; +// for (size_t i = i0; i < i1; ++i) { +// T value; +// bool isvalid = col.get_element(i, &value); +// count += !isvalid; +// } +// *out = count; +// return true; // *out is not NA +// } + + + +// template +// static Column _countna(Column&& arg, const Groupby& gby) { +// return Column( +// new Latent_ColumnImpl( +// new Reduced_ColumnImpl( +// SType::INT64, std::move(arg), gby, op_countna +// ))); +// } + +// static Column compute_countna(Column&& arg, const Groupby& gby) { +// switch (arg.stype()) { +// case SType::VOID: +// case SType::BOOL: +// case SType::INT8: return _countna(std::move(arg), gby); +// case SType::INT16: return _countna(std::move(arg), gby); +// case SType::DATE32: +// case SType::INT32: return _countna(std::move(arg), gby); +// case SType::TIME64: +// case SType::INT64: return _countna(std::move(arg), gby); +// case SType::FLOAT32: return _countna(std::move(arg), gby); +// case SType::FLOAT64: return _countna(std::move(arg), gby); +// case SType::STR32: +// case SType::STR64: return _countna(std::move(arg), gby); +// default: throw _error("countna", arg.stype()); +// } +// } + + +// //------------------------------------------------------------------------------ +// // count/countna(A:grouped) +// //------------------------------------------------------------------------------ + +// // T is the type of the input column +// template +// class CountGrouped_ColumnImpl : public Virtual_ColumnImpl +// { +// private: +// Column arg; +// Groupby groupby; + +// public: +// CountGrouped_ColumnImpl(Column&& col, const Groupby& grpby) +// : Virtual_ColumnImpl(grpby.size(), SType::INT64), +// arg(std::move(col)), +// groupby(grpby) {} + +// ColumnImpl* clone() const override { +// return new CountGrouped_ColumnImpl(Column(arg), groupby); +// } + +// bool get_element(size_t i, int64_t* out) const override { +// T value; +// bool isvalid = arg.get_element(i, &value); +// if (isvalid ^ NA) { +// size_t i0, i1; +// groupby.get_group(i, &i0, &i1); +// *out = static_cast(i1 - i0); +// } else { +// *out = 0; +// } +// return true; +// } + +// size_t n_children() const noexcept override { +// return 1; +// } + +// const Column& child(size_t i) const override { +// xassert(i == 0); (void)i; +// return arg; +// } + +// }; + + +// template +// static Column _gcount(Column&& arg, const Groupby& gby) { +// return Column(new CountGrouped_ColumnImpl(std::move(arg), gby)); +// } + +// template +// static Column compute_gcount(Column&& arg, const Groupby& gby) { +// switch (arg.stype()) { +// case SType::VOID: return Column(new ConstInt_ColumnImpl(1, 0, SType::INT64)); +// case SType::BOOL: +// case SType::INT8: return _gcount(std::move(arg), gby); +// case SType::INT16: return _gcount(std::move(arg), gby); +// case SType::DATE32: +// case SType::INT32: return _gcount(std::move(arg), gby); +// case SType::TIME64: +// case SType::INT64: return _gcount(std::move(arg), gby); +// case SType::FLOAT32: return _gcount(std::move(arg), gby); +// case SType::FLOAT64: return _gcount(std::move(arg), gby); +// case SType::STR32: +// case SType::STR64: return _gcount(std::move(arg), gby); +// default: throw _error("count", arg.stype()); +// } +// } @@ -713,8 +713,8 @@ Workframe Head_Reduce_Unary::evaluate_n( case Op::STDEV: fn = compute_sd; break; case Op::FIRST: fn = compute_firstlast; break; case Op::LAST: fn = compute_firstlast; break; - case Op::COUNT: fn = compute_count; break; - case Op::COUNTNA:fn = compute_countna; break; + //case Op::COUNT: fn = compute_count; break; + //case Op::COUNTNA:fn = compute_countna; break; case Op::MEDIAN: fn = compute_median; break; case Op::NUNIQUE:fn = compute_nunique; break; default: throw TypeError() << "Unknown reducer function: " @@ -725,8 +725,8 @@ Workframe Head_Reduce_Unary::evaluate_n( case Op::STDEV: fn = compute_gsd; break; case Op::FIRST: case Op::LAST: fn = compute_gfirstlast; break; - case Op::COUNT: fn = compute_gcount; break; - case Op::COUNTNA:fn = compute_gcount; break; + //case Op::COUNT: fn = compute_gcount; break; + //case Op::COUNTNA:fn = compute_gcount; break; case Op::MEDIAN: fn = compute_gmedian; break; case Op::NUNIQUE:fn = compute_gnunique; break; default: throw TypeError() << "Unknown reducer function: " diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index 06c4d9ffd..f692d6c81 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -45,7 +45,7 @@ def count(iterable=None): if isinstance(iterable, (Expr, core.FExpr)): - return Expr(OpCodes.COUNT, (iterable,)) + return core.count(iterable) elif iterable is None: return Expr(OpCodes.COUNT0, ()) else: From 1be2f5cffd57dc175ade1bc704708fe999f44a81 Mon Sep 17 00:00:00 2001 From: samukweku Date: Fri, 10 Mar 2023 22:02:25 +1100 Subject: [PATCH 005/124] add count for all rows --- src/core/column/countna.h | 8 ++++++-- src/core/expr/fexpr_countna.cc | 23 +++++++++++++++-------- src/datatable/expr/reduce.py | 3 ++- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/core/column/countna.h b/src/core/column/countna.h index a42cc8431..8b506875f 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -25,7 +25,7 @@ namespace dt { -template +template class CountNA_ColumnImpl : public ReduceUnary_ColumnImpl { public: using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; @@ -36,7 +36,11 @@ class CountNA_ColumnImpl : public ReduceUnary_ColumnImpl { this->gby_.get_group(i, &i0, &i1); int64_t count = 0; - if (IS_GROUPED){ + if (COUNTT) { + *out = static_cast(i1 - i0); + return true; + } + else if (IS_GROUPED){ bool isvalid = this->col_.get_element(i, &value); if (COUNT){ count = isvalid? static_cast(i1 - i0) : 0; diff --git a/src/core/expr/fexpr_countna.cc b/src/core/expr/fexpr_countna.cc index 7f6d48e54..ccac5710a 100644 --- a/src/core/expr/fexpr_countna.cc +++ b/src/core/expr/fexpr_countna.cc @@ -31,7 +31,7 @@ namespace dt { namespace expr { -template +template class FExpr_CountNA : public FExpr_Func { private: ptrExpr arg_; @@ -43,7 +43,7 @@ class FExpr_CountNA : public FExpr_Func { std::string repr() const override { std::string out = COUNT? "count" : "countna"; out += '('; - out += arg_->repr(); + if (!COUNTT) out += arg_->repr(); out += ')'; return out; } @@ -64,8 +64,12 @@ class FExpr_CountNA : public FExpr_Func { wf.get_column_id(i) ); - Column coli = evaluate1(wf.retrieve_column(i), gby, is_grouped); - outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); + Column coli = evaluate1(wf.retrieve_column(i), gby, is_grouped); + if (COUNTT) { + outputs.add_column(std::move(coli), "count", Grouping::GtoONE); + } else { + outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); + } } return outputs; @@ -105,11 +109,11 @@ class FExpr_CountNA : public FExpr_Func { template Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( + return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( std::move(col), gby ))); } else { - return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( + return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( std::move(col), gby ))); } @@ -118,12 +122,15 @@ class FExpr_CountNA : public FExpr_Func { static py::oobj pyfn_count(const py::XArgs &args) { auto count = args[0].to_oobj_or_none(); - return PyFExpr::make(new FExpr_CountNA(as_fexpr(count))); + if (count.is_none()) { + return PyFExpr::make(new FExpr_CountNA(as_fexpr(count))); + } + return PyFExpr::make(new FExpr_CountNA(as_fexpr(count))); } static py::oobj pyfn_countna(const py::XArgs &args) { auto countna = args[0].to_oobj(); - return PyFExpr::make(new FExpr_CountNA(as_fexpr(countna))); + return PyFExpr::make(new FExpr_CountNA(as_fexpr(countna))); } diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index f692d6c81..72f219a7f 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -47,7 +47,8 @@ def count(iterable=None): if isinstance(iterable, (Expr, core.FExpr)): return core.count(iterable) elif iterable is None: - return Expr(OpCodes.COUNT0, ()) + return core.count(iterable) + #return Expr(OpCodes.COUNT0, ()) else: return _builtin_sum((x is not None) for x in iterable) From 4ca61ac1266b92262cafe4d01a6b27a7f0940c51 Mon Sep 17 00:00:00 2001 From: samukweku Date: Fri, 10 Mar 2023 22:09:04 +1100 Subject: [PATCH 006/124] simplify logic choice --- src/datatable/expr/reduce.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index 72f219a7f..e094654a6 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -44,11 +44,8 @@ def count(iterable=None): - if isinstance(iterable, (Expr, core.FExpr)): - return core.count(iterable) - elif iterable is None: + if isinstance(iterable, (Expr, core.FExpr)) or (iterable is None): return core.count(iterable) - #return Expr(OpCodes.COUNT0, ()) else: return _builtin_sum((x is not None) for x in iterable) From 518b12d01a7332cb84413424caa757f7b3f86699 Mon Sep 17 00:00:00 2001 From: samukweku Date: Fri, 10 Mar 2023 22:27:22 +1100 Subject: [PATCH 007/124] update docs links --- docs/api/dt/count.rst | 2 +- docs/api/dt/countna.rst | 2 +- src/core/column/countna.h | 3 +-- src/core/expr/{fexpr_countna.cc => fexpr_count_countna.cc} | 0 4 files changed, 3 insertions(+), 4 deletions(-) rename src/core/expr/{fexpr_countna.cc => fexpr_count_countna.cc} (100%) diff --git a/docs/api/dt/count.rst b/docs/api/dt/count.rst index a21e38440..951a25bfb 100644 --- a/docs/api/dt/count.rst +++ b/docs/api/dt/count.rst @@ -1,6 +1,6 @@ .. xfunction:: datatable.count - :src: src/core/expr/head_reduce_unary.cc count_reducer + :src: src/core/expr/fexpr_count_countna.cc pyfn_count :cvar: doc_dt_count :tests: tests/test-reduce.py :signature: count(cols) diff --git a/docs/api/dt/countna.rst b/docs/api/dt/countna.rst index ff8570516..fafbc0f1e 100644 --- a/docs/api/dt/countna.rst +++ b/docs/api/dt/countna.rst @@ -1,6 +1,6 @@ .. xfunction:: datatable.countna - :src: src/core/expr/fexpr_minmax.cc pyfn_countna + :src: src/core/expr/fexpr_count_countna.cc pyfn_countna :tests: tests/test-reduce.py :cvar: doc_dt_countna :signature: countna(cols) diff --git a/src/core/column/countna.h b/src/core/column/countna.h index 8b506875f..21d53b65e 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -46,8 +46,7 @@ class CountNA_ColumnImpl : public ReduceUnary_ColumnImpl { count = isvalid? static_cast(i1 - i0) : 0; } else { count = isvalid? 0: static_cast(i1 - i0); - } - + } *out = count; return true; } else { diff --git a/src/core/expr/fexpr_countna.cc b/src/core/expr/fexpr_count_countna.cc similarity index 100% rename from src/core/expr/fexpr_countna.cc rename to src/core/expr/fexpr_count_countna.cc From c8311f3daf92181cbebf828516673f839c08d8e9 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sat, 11 Mar 2023 11:06:02 +1100 Subject: [PATCH 008/124] update code based on feedback --- src/core/column/countna.h | 10 +- src/core/expr/fexpr_count_countna.cc | 20 ++-- src/core/expr/head_reduce_unary.cc | 173 --------------------------- 3 files changed, 18 insertions(+), 185 deletions(-) diff --git a/src/core/column/countna.h b/src/core/column/countna.h index 21d53b65e..83e21bb42 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2022 H2O.ai +// Copyright 2023 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -25,7 +25,7 @@ namespace dt { -template +template class CountNA_ColumnImpl : public ReduceUnary_ColumnImpl { public: using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; @@ -36,13 +36,13 @@ class CountNA_ColumnImpl : public ReduceUnary_ColumnImpl { this->gby_.get_group(i, &i0, &i1); int64_t count = 0; - if (COUNTT) { + if (COUNT_ALL_ROWS) { *out = static_cast(i1 - i0); return true; } else if (IS_GROUPED){ bool isvalid = this->col_.get_element(i, &value); - if (COUNT){ + if (COUNT_NOT_NULL){ count = isvalid? static_cast(i1 - i0) : 0; } else { count = isvalid? 0: static_cast(i1 - i0); @@ -52,7 +52,7 @@ class CountNA_ColumnImpl : public ReduceUnary_ColumnImpl { } else { for (size_t gi = i0; gi < i1; ++gi) { bool isvalid = this->col_.get_element(gi, &value); - count += COUNT? isvalid : !isvalid; + count += COUNT_NOT_NULL? isvalid : !isvalid; } *out = count; return true; // *out is not NA diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index ccac5710a..2943d6f64 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2022 H2O.ai +// Copyright 2023 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -31,7 +31,7 @@ namespace dt { namespace expr { -template +template class FExpr_CountNA : public FExpr_Func { private: ptrExpr arg_; @@ -41,9 +41,9 @@ class FExpr_CountNA : public FExpr_Func { : arg_(std::move(arg)) {} std::string repr() const override { - std::string out = COUNT? "count" : "countna"; + std::string out = COUNT_NOT_NULL? "count" : "countna"; out += '('; - if (!COUNTT) out += arg_->repr(); + if (!COUNT_ALL_ROWS) out += arg_->repr(); out += ')'; return out; } @@ -54,6 +54,12 @@ class FExpr_CountNA : public FExpr_Func { Workframe wf = arg_->evaluate_n(ctx); Groupby gby = ctx.get_groupby(); + if (!gby && COUNT_ALL_ROWS) { + int64_t nrows = static_cast(ctx.nrows()); + Column coli = Column(new ConstInt_ColumnImpl(1, nrows, SType::INT64)); + outputs.add_column(std::move(coli), "count", Grouping::GtoONE); + return outputs; + } if (!gby) { gby = Groupby::single_group(wf.nrows()); } @@ -65,7 +71,7 @@ class FExpr_CountNA : public FExpr_Func { ); Column coli = evaluate1(wf.retrieve_column(i), gby, is_grouped); - if (COUNTT) { + if (COUNT_ALL_ROWS) { outputs.add_column(std::move(coli), "count", Grouping::GtoONE); } else { outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); @@ -109,11 +115,11 @@ class FExpr_CountNA : public FExpr_Func { template Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( + return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( std::move(col), gby ))); } else { - return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( + return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( std::move(col), gby ))); } diff --git a/src/core/expr/head_reduce_unary.cc b/src/core/expr/head_reduce_unary.cc index 6b129f2ae..f7bceaf66 100644 --- a/src/core/expr/head_reduce_unary.cc +++ b/src/core/expr/head_reduce_unary.cc @@ -308,175 +308,6 @@ static Column compute_gsd(Column&& arg, const Groupby& gby) { -// //------------------------------------------------------------------------------ -// // count(A) -// //------------------------------------------------------------------------------ - -// template -// bool count_reducer(const Column& col, size_t i0, size_t i1, int64_t* out) { -// int64_t count = 0; -// for (size_t i = i0; i < i1; ++i) { -// T value; -// bool isvalid = col.get_element(i, &value); -// count += isvalid; -// } -// *out = count; -// return true; // *out is not NA -// } - - -// template -// static Column _count(Column&& arg, const Groupby& gby) { -// return Column( -// new Latent_ColumnImpl( -// new Reduced_ColumnImpl( -// SType::INT64, std::move(arg), gby, count_reducer -// ))); -// } - - -// static Column compute_count(Column&& arg, const Groupby& gby) { -// switch (arg.stype()) { -// case SType::VOID: return Column(new ConstInt_ColumnImpl( -// gby.size(), 0, SType::INT64 -// )); -// case SType::BOOL: -// case SType::INT8: return _count(std::move(arg), gby); -// case SType::INT16: return _count(std::move(arg), gby); -// case SType::DATE32: -// case SType::INT32: return _count(std::move(arg), gby); -// case SType::TIME64: -// case SType::INT64: return _count(std::move(arg), gby); -// case SType::FLOAT32: return _count(std::move(arg), gby); -// case SType::FLOAT64: return _count(std::move(arg), gby); -// case SType::STR32: -// case SType::STR64: return _count(std::move(arg), gby); -// default: throw _error("count", arg.stype()); -// } -// } - - -// //------------------------------------------------------------------------------ -// // countna -// //------------------------------------------------------------------------------ - -// template -// bool op_countna(const Column& col, size_t i0, size_t i1, int64_t* out) { -// int64_t count = 0; -// for (size_t i = i0; i < i1; ++i) { -// T value; -// bool isvalid = col.get_element(i, &value); -// count += !isvalid; -// } -// *out = count; -// return true; // *out is not NA -// } - - - -// template -// static Column _countna(Column&& arg, const Groupby& gby) { -// return Column( -// new Latent_ColumnImpl( -// new Reduced_ColumnImpl( -// SType::INT64, std::move(arg), gby, op_countna -// ))); -// } - -// static Column compute_countna(Column&& arg, const Groupby& gby) { -// switch (arg.stype()) { -// case SType::VOID: -// case SType::BOOL: -// case SType::INT8: return _countna(std::move(arg), gby); -// case SType::INT16: return _countna(std::move(arg), gby); -// case SType::DATE32: -// case SType::INT32: return _countna(std::move(arg), gby); -// case SType::TIME64: -// case SType::INT64: return _countna(std::move(arg), gby); -// case SType::FLOAT32: return _countna(std::move(arg), gby); -// case SType::FLOAT64: return _countna(std::move(arg), gby); -// case SType::STR32: -// case SType::STR64: return _countna(std::move(arg), gby); -// default: throw _error("countna", arg.stype()); -// } -// } - - -// //------------------------------------------------------------------------------ -// // count/countna(A:grouped) -// //------------------------------------------------------------------------------ - -// // T is the type of the input column -// template -// class CountGrouped_ColumnImpl : public Virtual_ColumnImpl -// { -// private: -// Column arg; -// Groupby groupby; - -// public: -// CountGrouped_ColumnImpl(Column&& col, const Groupby& grpby) -// : Virtual_ColumnImpl(grpby.size(), SType::INT64), -// arg(std::move(col)), -// groupby(grpby) {} - -// ColumnImpl* clone() const override { -// return new CountGrouped_ColumnImpl(Column(arg), groupby); -// } - -// bool get_element(size_t i, int64_t* out) const override { -// T value; -// bool isvalid = arg.get_element(i, &value); -// if (isvalid ^ NA) { -// size_t i0, i1; -// groupby.get_group(i, &i0, &i1); -// *out = static_cast(i1 - i0); -// } else { -// *out = 0; -// } -// return true; -// } - -// size_t n_children() const noexcept override { -// return 1; -// } - -// const Column& child(size_t i) const override { -// xassert(i == 0); (void)i; -// return arg; -// } - -// }; - - -// template -// static Column _gcount(Column&& arg, const Groupby& gby) { -// return Column(new CountGrouped_ColumnImpl(std::move(arg), gby)); -// } - -// template -// static Column compute_gcount(Column&& arg, const Groupby& gby) { -// switch (arg.stype()) { -// case SType::VOID: return Column(new ConstInt_ColumnImpl(1, 0, SType::INT64)); -// case SType::BOOL: -// case SType::INT8: return _gcount(std::move(arg), gby); -// case SType::INT16: return _gcount(std::move(arg), gby); -// case SType::DATE32: -// case SType::INT32: return _gcount(std::move(arg), gby); -// case SType::TIME64: -// case SType::INT64: return _gcount(std::move(arg), gby); -// case SType::FLOAT32: return _gcount(std::move(arg), gby); -// case SType::FLOAT64: return _gcount(std::move(arg), gby); -// case SType::STR32: -// case SType::STR64: return _gcount(std::move(arg), gby); -// default: throw _error("count", arg.stype()); -// } -// } - - - - - //------------------------------------------------------------------------------ // nunique(A:grouped) //------------------------------------------------------------------------------ @@ -713,8 +544,6 @@ Workframe Head_Reduce_Unary::evaluate_n( case Op::STDEV: fn = compute_sd; break; case Op::FIRST: fn = compute_firstlast; break; case Op::LAST: fn = compute_firstlast; break; - //case Op::COUNT: fn = compute_count; break; - //case Op::COUNTNA:fn = compute_countna; break; case Op::MEDIAN: fn = compute_median; break; case Op::NUNIQUE:fn = compute_nunique; break; default: throw TypeError() << "Unknown reducer function: " @@ -725,8 +554,6 @@ Workframe Head_Reduce_Unary::evaluate_n( case Op::STDEV: fn = compute_gsd; break; case Op::FIRST: case Op::LAST: fn = compute_gfirstlast; break; - //case Op::COUNT: fn = compute_gcount; break; - //case Op::COUNTNA:fn = compute_gcount; break; case Op::MEDIAN: fn = compute_gmedian; break; case Op::NUNIQUE:fn = compute_gnunique; break; default: throw TypeError() << "Unknown reducer function: " From fd5311a15444c411322002d5c386f6927428ccdc Mon Sep 17 00:00:00 2001 From: samukweku Date: Sat, 11 Mar 2023 11:40:01 +1100 Subject: [PATCH 009/124] cleanup --- src/core/expr/fexpr_count_countna.cc | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index 2943d6f64..d8da59dbc 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -28,6 +28,7 @@ #include "expr/workframe.h" #include "python/xargs.h" #include "stype.h" +#include namespace dt { namespace expr { @@ -68,23 +69,20 @@ class FExpr_CountNA : public FExpr_Func { bool is_grouped = ctx.has_group_column( wf.get_frame_id(i), wf.get_column_id(i) - ); - + ); Column coli = evaluate1(wf.retrieve_column(i), gby, is_grouped); if (COUNT_ALL_ROWS) { outputs.add_column(std::move(coli), "count", Grouping::GtoONE); } else { outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); } - } - + } return outputs; } Column evaluate1(Column &&col, const Groupby& gby, bool is_grouped) const { SType stype = col.stype(); - switch (stype) { case SType::VOID: case SType::BOOL: From 29f5145a721842e10c43c1557d8f210854fdd94b Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 12 Mar 2023 00:45:51 +1100 Subject: [PATCH 010/124] updates based on feedback --- src/core/column/count_all_rows.h | 72 ++++++++++++++++++++++++++++ src/core/column/countna.h | 21 ++++---- src/core/expr/fexpr_count_countna.cc | 43 ++++++++++------- 3 files changed, 105 insertions(+), 31 deletions(-) create mode 100644 src/core/column/count_all_rows.h diff --git a/src/core/column/count_all_rows.h b/src/core/column/count_all_rows.h new file mode 100644 index 000000000..c23746c15 --- /dev/null +++ b/src/core/column/count_all_rows.h @@ -0,0 +1,72 @@ +//------------------------------------------------------------------------------ +// Copyright 2022 H2O.ai +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. +//------------------------------------------------------------------------------ +#ifndef dt_COLUMN_COUNTALLROWS_h +#define dt_COLUMN_COUNTALLROWS_h +#include "column/virtual.h" +#include "parallel/api.h" +#include "stype.h" +namespace dt { + + +class CountAllRows_ColumnImpl : public Virtual_ColumnImpl { + private: + Groupby gby_; + + public: + CountAllRows_ColumnImpl(const Groupby& gby) + : Virtual_ColumnImpl(gby.size(), SType::INT64), + gby_(gby) + {} + + + ColumnImpl* clone() const override { + return new CountAllRows_ColumnImpl(gby_); + } + + + size_t n_children() const noexcept override { + return 0; + } + + void materialize(Column &col_out, bool) override { + size_t nrows = gby_.size(); + const int32_t* offsets = gby_.offsets_r(); + Column col = Column::new_data_column(nrows, SType::INT64); + auto data = static_cast(col.get_data_editable()); + dt::parallel_for_dynamic(gby_.size(), + [&](size_t gi) { + for (size_t i = 0; i < nrows; ++i) { + data[i] = offsets[i + 1] - offsets[i]; + } + } + ); + + col_out = std::move(col); + } + +}; + + +} // namespace dt + + +#endif diff --git a/src/core/column/countna.h b/src/core/column/countna.h index 83e21bb42..7752b0801 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -25,8 +25,8 @@ namespace dt { -template -class CountNA_ColumnImpl : public ReduceUnary_ColumnImpl { +template +class Count_ColumnImpl : public ReduceUnary_ColumnImpl { public: using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; @@ -35,24 +35,19 @@ class CountNA_ColumnImpl : public ReduceUnary_ColumnImpl { size_t i0, i1; this->gby_.get_group(i, &i0, &i1); int64_t count = 0; - - if (COUNT_ALL_ROWS) { - *out = static_cast(i1 - i0); - return true; - } - else if (IS_GROUPED){ + if (IS_GROUPED){ bool isvalid = this->col_.get_element(i, &value); - if (COUNT_NOT_NULL){ - count = isvalid? static_cast(i1 - i0) : 0; + if (COUNTNA){ + count = isvalid? 0: static_cast(i1 - i0); } else { - count = isvalid? 0: static_cast(i1 - i0); + count = isvalid? static_cast(i1 - i0) : 0; } *out = count; - return true; + return true; // *out is not NA } else { for (size_t gi = i0; gi < i1; ++gi) { bool isvalid = this->col_.get_element(gi, &value); - count += COUNT_NOT_NULL? isvalid : !isvalid; + count += COUNTNA? !isvalid : isvalid; } *out = count; return true; // *out is not NA diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index d8da59dbc..5ec8dda42 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -22,6 +22,7 @@ #include "column/const.h" #include "column/latent.h" #include "column/countna.h" +#include "column/count_all_rows.h" #include "documentation.h" #include "expr/fexpr_func.h" #include "expr/eval_context.h" @@ -32,7 +33,7 @@ namespace dt { namespace expr { -template +template class FExpr_CountNA : public FExpr_Func { private: ptrExpr arg_; @@ -42,9 +43,9 @@ class FExpr_CountNA : public FExpr_Func { : arg_(std::move(arg)) {} std::string repr() const override { - std::string out = COUNT_NOT_NULL? "count" : "countna"; + std::string out = COUNTNA? "countna" : "count"; out += '('; - if (!COUNT_ALL_ROWS) out += arg_->repr(); + if (arg_->get_expr_kind() != Kind::None) out += arg_->repr(); out += ')'; return out; } @@ -54,13 +55,26 @@ class FExpr_CountNA : public FExpr_Func { Workframe outputs(ctx); Workframe wf = arg_->evaluate_n(ctx); Groupby gby = ctx.get_groupby(); + // this covers a scenario where + // we dont care about the presence or absence of NAs + // we just want the total number of rows + bool count_all_rows = arg_->get_expr_kind() == Kind::None; + + if (count_all_rows && !gby) { + auto value = static_cast(ctx.nrows()); + Column coli = Const_ColumnImpl::make_int_column(1, value, SType::INT64); + outputs.add_column(std::move(coli), "count", Grouping::GtoONE); + return outputs; + } - if (!gby && COUNT_ALL_ROWS) { - int64_t nrows = static_cast(ctx.nrows()); - Column coli = Column(new ConstInt_ColumnImpl(1, nrows, SType::INT64)); + if (count_all_rows && gby) { + Column coli = Column(new Latent_ColumnImpl( + new CountAllRows_ColumnImpl(gby) + )); outputs.add_column(std::move(coli), "count", Grouping::GtoONE); return outputs; } + if (!gby) { gby = Groupby::single_group(wf.nrows()); } @@ -71,11 +85,7 @@ class FExpr_CountNA : public FExpr_Func { wf.get_column_id(i) ); Column coli = evaluate1(wf.retrieve_column(i), gby, is_grouped); - if (COUNT_ALL_ROWS) { - outputs.add_column(std::move(coli), "count", Grouping::GtoONE); - } else { - outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); - } + outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); } return outputs; } @@ -113,11 +123,11 @@ class FExpr_CountNA : public FExpr_Func { template Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( + return Column(new Latent_ColumnImpl(new Count_ColumnImpl( std::move(col), gby ))); } else { - return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( + return Column(new Latent_ColumnImpl(new Count_ColumnImpl( std::move(col), gby ))); } @@ -126,15 +136,12 @@ class FExpr_CountNA : public FExpr_Func { static py::oobj pyfn_count(const py::XArgs &args) { auto count = args[0].to_oobj_or_none(); - if (count.is_none()) { - return PyFExpr::make(new FExpr_CountNA(as_fexpr(count))); - } - return PyFExpr::make(new FExpr_CountNA(as_fexpr(count))); + return PyFExpr::make(new FExpr_CountNA(as_fexpr(count))); } static py::oobj pyfn_countna(const py::XArgs &args) { auto countna = args[0].to_oobj(); - return PyFExpr::make(new FExpr_CountNA(as_fexpr(countna))); + return PyFExpr::make(new FExpr_CountNA(as_fexpr(countna))); } From fd878f9c11849011957b63515a8f168f908750e1 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 12 Mar 2023 01:07:16 +1100 Subject: [PATCH 011/124] 2022 -> 2023 copyright --- src/core/column/count_all_rows.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/core/column/count_all_rows.h b/src/core/column/count_all_rows.h index c23746c15..0022efd48 100644 --- a/src/core/column/count_all_rows.h +++ b/src/core/column/count_all_rows.h @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2022 H2O.ai +// Copyright 2023 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -26,7 +26,6 @@ #include "stype.h" namespace dt { - class CountAllRows_ColumnImpl : public Virtual_ColumnImpl { private: Groupby gby_; From cb94aa97ea0b915691052c322d289ddcc59aa674 Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Sun, 12 Mar 2023 01:52:42 +1100 Subject: [PATCH 012/124] Remove irrelevant header file --- src/core/expr/fexpr_count_countna.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index 5ec8dda42..b7efca24f 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -29,7 +29,6 @@ #include "expr/workframe.h" #include "python/xargs.h" #include "stype.h" -#include namespace dt { namespace expr { From a37e3e1d3a1395e29db869751c9fc1c59c58ff2e Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 12 Mar 2023 11:41:00 +1100 Subject: [PATCH 013/124] code update with more shortcuts --- src/core/expr/fexpr_count_countna.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index b7efca24f..7c0c92c3c 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -82,9 +82,13 @@ class FExpr_CountNA : public FExpr_Func { bool is_grouped = ctx.has_group_column( wf.get_frame_id(i), wf.get_column_id(i) - ); - Column coli = evaluate1(wf.retrieve_column(i), gby, is_grouped); - outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); + ); + Column coli = wf.retrieve_column(i); + if (COUNTNA && !ctx.has_groupby() && (coli.stype() == SType::VOID)) { + int64_t nrows = static_cast(ctx.nrows()); + coli = Const_ColumnImpl::make_int_column(1, nrows, SType::INT64); + } else {coli = evaluate1(std::move(coli), gby, is_grouped);} + outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); } return outputs; } From d5475337ba64f6193bd7d3ae840e466be58d6f08 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 12 Mar 2023 11:43:16 +1100 Subject: [PATCH 014/124] remove irrelevant header files --- docs/api/dt/count.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/api/dt/count.rst b/docs/api/dt/count.rst index 951a25bfb..90836703b 100644 --- a/docs/api/dt/count.rst +++ b/docs/api/dt/count.rst @@ -3,14 +3,14 @@ :src: src/core/expr/fexpr_count_countna.cc pyfn_count :cvar: doc_dt_count :tests: tests/test-reduce.py - :signature: count(cols) + :signature: count(cols=None) Calculate the number of non-missing values for each column from `cols`. Parameters ---------- cols: FExpr - Input columns. + Input columns. If no `cols` is passed, then the count of all rows is returned. return: Expr f-expression having one row, and the same names and number of columns From c693927ccc3a6240efc93651b7b7422f48d4afeb Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 12 Mar 2023 11:56:04 +1100 Subject: [PATCH 015/124] code update based on feedback --- src/core/expr/fexpr_count_countna.cc | 19 +++++++------- src/core/expr/head_reduce_nullary.cc | 37 ---------------------------- 2 files changed, 9 insertions(+), 47 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index 7c0c92c3c..e46c92482 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -54,22 +54,21 @@ class FExpr_CountNA : public FExpr_Func { Workframe outputs(ctx); Workframe wf = arg_->evaluate_n(ctx); Groupby gby = ctx.get_groupby(); - // this covers a scenario where + // this covers scenarios where // we dont care about the presence or absence of NAs // we just want the total number of rows bool count_all_rows = arg_->get_expr_kind() == Kind::None; - if (count_all_rows && !gby) { - auto value = static_cast(ctx.nrows()); - Column coli = Const_ColumnImpl::make_int_column(1, value, SType::INT64); - outputs.add_column(std::move(coli), "count", Grouping::GtoONE); - return outputs; - } - - if (count_all_rows && gby) { - Column coli = Column(new Latent_ColumnImpl( + if (count_all_rows) { + Column coli; + if (gby){ + coli = Column(new Latent_ColumnImpl( new CountAllRows_ColumnImpl(gby) )); + } else { + auto value = static_cast(ctx.nrows()); + coli = Const_ColumnImpl::make_int_column(1, value, SType::INT64); + } outputs.add_column(std::move(coli), "count", Grouping::GtoONE); return outputs; } diff --git a/src/core/expr/head_reduce_nullary.cc b/src/core/expr/head_reduce_nullary.cc index dd85c9662..2041fd048 100644 --- a/src/core/expr/head_reduce_nullary.cc +++ b/src/core/expr/head_reduce_nullary.cc @@ -31,53 +31,16 @@ namespace expr { -//------------------------------------------------------------------------------ -// count() -//------------------------------------------------------------------------------ - -static Column _count0(EvalContext& ctx) -{ - if (ctx.has_groupby()) { - // TODO: convert this into a virtual column - const Groupby& grpby = ctx.get_groupby(); - size_t ng = grpby.size(); - const int32_t* offsets = grpby.offsets_r(); - Column col = Column::new_data_column(ng, SType::INT64); - auto d_res = static_cast(col.get_data_editable()); - for (size_t i = 0; i < ng; ++i) { - d_res[i] = offsets[i + 1] - offsets[i]; - } - return col; - } - else { - auto value = static_cast(ctx.nrows()); - return Const_ColumnImpl::make_int_column(1, value, SType::INT64); - } -} - - - //------------------------------------------------------------------------------ // Head_Reduce_Nullary //------------------------------------------------------------------------------ -static Workframe _wrap_column(EvalContext& ctx, Column&& col, std::string&& name) { - Workframe outputs(ctx); - outputs.add_column(std::move(col), std::move(name), Grouping::GtoONE); - return outputs; -} - - Workframe Head_Reduce_Nullary::evaluate_n( const vecExpr& args, EvalContext& ctx) const { xassert(args.size() == 0); (void) args; - switch (op) { - case Op::COUNT0: return _wrap_column(ctx, _count0(ctx), "count"); - default: break; - } throw RuntimeError() << "Unknown op " << static_cast(op) << " in Head_Reduce_Nullary"; } From 3d0f9d4f5c698b1474ead6acbd65ed364de419c4 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 12 Mar 2023 11:58:51 +1100 Subject: [PATCH 016/124] add more details for count in docs --- docs/api/dt/count.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/api/dt/count.rst b/docs/api/dt/count.rst index 90836703b..a69bb3863 100644 --- a/docs/api/dt/count.rst +++ b/docs/api/dt/count.rst @@ -5,7 +5,8 @@ :tests: tests/test-reduce.py :signature: count(cols=None) - Calculate the number of non-missing values for each column from `cols`. + Calculate the number of non-missing values for each column from `cols`, if `cols` is provided, + or the total number of rows if `cols` is not provided. Parameters ---------- From 8270fa21c059a3ad2ec20ccad3a0b22547e9a643 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 12 Mar 2023 12:05:28 +1100 Subject: [PATCH 017/124] countna must have an iterable --- src/core/expr/fexpr_count_countna.cc | 2 +- src/datatable/expr/reduce.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index e46c92482..c9d8d0d98 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -59,7 +59,7 @@ class FExpr_CountNA : public FExpr_Func { // we just want the total number of rows bool count_all_rows = arg_->get_expr_kind() == Kind::None; - if (count_all_rows) { + if (count_all_rows && !COUNTNA) { Column coli; if (gby){ coli = Column(new Latent_ColumnImpl( diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index e094654a6..163790871 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -54,7 +54,7 @@ def nunique(iterable=None): return Expr(OpCodes.NUNIQUE, (iterable,)) -def countna(iterable=None): +def countna(iterable): return core.countna(iterable) From f52a618f374dc3f778985e0225ca09925a0076ad Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 12 Mar 2023 21:26:17 +1100 Subject: [PATCH 018/124] add test for void countna --- tests/dt/test-countna.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/dt/test-countna.py b/tests/dt/test-countna.py index f8660f41d..10e9ff8f6 100644 --- a/tests/dt/test-countna.py +++ b/tests/dt/test-countna.py @@ -74,3 +74,9 @@ def test_dt_count_na2(): EXP = dt.Frame(G=[1,2], V1=[3,1], V2=[1,0]) RES = DT[:, [dt.countna(f.V), dt.countna(dt.mean(f.V))], dt.by(f.G)] assert EXP.to_list() == RES.to_list() + + +def test_dt_countna_void(): + DT = dt.Frame([None]) + RES = DT[:, dt.countna(f.C0), dt.by(f.C0)] + EXP = dt.Frame({"C0":[None], "C1":[1]/dt.int64}) \ No newline at end of file From 18de8cfd81074c7229db2ac7c75d8434ea4bc60e Mon Sep 17 00:00:00 2001 From: samukweku Date: Tue, 14 Mar 2023 09:27:05 +1100 Subject: [PATCH 019/124] update based on feedback --- src/core/expr/fexpr_count_countna.cc | 20 ++++++++++---------- src/datatable/expr/reduce.py | 11 ++++++++--- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index c9d8d0d98..39faae5ea 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -99,22 +99,22 @@ class FExpr_CountNA : public FExpr_Func { case SType::VOID: case SType::BOOL: case SType::INT8: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::INT16: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::DATE32: case SType::INT32: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::TIME64: case SType::INT64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::FLOAT32: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::FLOAT64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::STR32: case SType::STR64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), gby, is_grouped); default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr(); @@ -122,14 +122,14 @@ class FExpr_CountNA : public FExpr_Func { } - template + template Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new Count_ColumnImpl( + return Column(new Latent_ColumnImpl(new Count_ColumnImpl( std::move(col), gby ))); } else { - return Column(new Latent_ColumnImpl(new Count_ColumnImpl( + return Column(new Latent_ColumnImpl(new Count_ColumnImpl( std::move(col), gby ))); } diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index 163790871..a9a5070a0 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -44,10 +44,15 @@ def count(iterable=None): - if isinstance(iterable, (Expr, core.FExpr)) or (iterable is None): + if iterable is None: return core.count(iterable) - else: - return _builtin_sum((x is not None) for x in iterable) + if (not isinstance(iterable, dict) + and (isinstance(iterable, core.FExpr) + or (iterable and hasattr(iterable, "__getitem__") and isinstance(iterable[0], core.FExpr)))): + return core.count(iterable) + if isinstance(iterable, dict) and isinstance([*iterable.values()][0], core.FExpr): + return core.count(iterable) + return _builtin_sum((x is not None) for x in iterable) def nunique(iterable=None): From 83d7e89b064af62b5cc4c0b652e40b94f1cfd10f Mon Sep 17 00:00:00 2001 From: samukweku Date: Tue, 14 Mar 2023 09:30:10 +1100 Subject: [PATCH 020/124] defensive steps for empty value --- src/datatable/expr/reduce.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index a9a5070a0..54f715283 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -110,7 +110,7 @@ def corr(col1, col2): def sum(iterable, start=0): if (not isinstance(iterable, dict) and (isinstance(iterable, core.FExpr) - or (hasattr(iterable, "__getitem__") and isinstance(iterable[0], core.FExpr)))): + or (iterable and hasattr(iterable, "__getitem__") and isinstance(iterable[0], core.FExpr)))): return core.sum(iterable) elif isinstance(iterable, dict) and isinstance([*iterable.values()][0], core.FExpr): return core.sum(iterable) @@ -123,7 +123,7 @@ def sum(iterable, start=0): def min(*args, **kwds): if (len(args) == 1 and (not isinstance(args[0], dict)) and (isinstance(args[0], (Expr, core.FExpr)) - or (hasattr(args[0], "__getitem__") and isinstance(args[0][0], (Expr, core.FExpr))))): + or (args[0] and hasattr(args[0], "__getitem__") and isinstance(args[0][0], (Expr, core.FExpr))))): return core.min(args) elif len(args) == 1 and isinstance(args[0], dict) and isinstance([*args[0].values()][0], (Expr, core.FExpr)): return core.min(args) @@ -137,7 +137,7 @@ def min(*args, **kwds): def max(*args, **kwds): if (len(args) == 1 and (not isinstance(args[0], dict)) and (isinstance(args[0], (Expr, core.FExpr)) - or (hasattr(args[0], "__getitem__") and isinstance(args[0][0], (Expr, core.FExpr))))): + or (args[0] and hasattr(args[0], "__getitem__") and isinstance(args[0][0], (Expr, core.FExpr))))): return core.max(args) elif len(args) == 1 and isinstance(args[0], dict) and isinstance([*args[0].values()][0], (Expr, core.FExpr)): return core.max(args) From 51201778d8eb1e09e3a8d06ff8c053241348a102 Mon Sep 17 00:00:00 2001 From: samukweku Date: Tue, 14 Mar 2023 10:20:11 +1100 Subject: [PATCH 021/124] update based on feedback --- src/core/column/countna.h | 10 +++++----- src/core/expr/fexpr_count_countna.cc | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/core/column/countna.h b/src/core/column/countna.h index 7752b0801..1de6205ba 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -25,12 +25,12 @@ namespace dt { -template -class Count_ColumnImpl : public ReduceUnary_ColumnImpl { +template +class Count_ColumnImpl : public ReduceUnary_ColumnImpl { public: using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; - bool get_element(size_t i, U* out) const override { + bool get_element(size_t i, int64_t* out) const override { T value; size_t i0, i1; this->gby_.get_group(i, &i0, &i1); @@ -38,9 +38,9 @@ class Count_ColumnImpl : public ReduceUnary_ColumnImpl { if (IS_GROUPED){ bool isvalid = this->col_.get_element(i, &value); if (COUNTNA){ - count = isvalid? 0: static_cast(i1 - i0); + count = isvalid? 0: static_cast(i1 - i0); } else { - count = isvalid? static_cast(i1 - i0) : 0; + count = isvalid? static_cast(i1 - i0) : 0; } *out = count; return true; // *out is not NA diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index 39faae5ea..b94f4c119 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -125,11 +125,11 @@ class FExpr_CountNA : public FExpr_Func { template Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new Count_ColumnImpl( + return Column(new Latent_ColumnImpl(new Count_ColumnImpl( std::move(col), gby ))); } else { - return Column(new Latent_ColumnImpl(new Count_ColumnImpl( + return Column(new Latent_ColumnImpl(new Count_ColumnImpl( std::move(col), gby ))); } From 334c6985869f44cc93c541178453776cf68760a0 Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Tue, 14 Mar 2023 16:42:42 +1100 Subject: [PATCH 022/124] Update src/core/expr/fexpr_count_countna.cc Co-authored-by: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> --- src/core/expr/fexpr_count_countna.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index b94f4c119..e9c7b6475 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -86,7 +86,9 @@ class FExpr_CountNA : public FExpr_Func { if (COUNTNA && !ctx.has_groupby() && (coli.stype() == SType::VOID)) { int64_t nrows = static_cast(ctx.nrows()); coli = Const_ColumnImpl::make_int_column(1, nrows, SType::INT64); - } else {coli = evaluate1(std::move(coli), gby, is_grouped);} + } else { + coli = evaluate1(std::move(coli), gby, is_grouped); + } outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); } return outputs; From 5f341faa7c1d1907ccc4f153f7501faf203ec1db Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Tue, 14 Mar 2023 16:43:43 +1100 Subject: [PATCH 023/124] Update src/core/column/countna.h Co-authored-by: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> --- src/core/column/countna.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/column/countna.h b/src/core/column/countna.h index 1de6205ba..0fd88d036 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -41,7 +41,7 @@ class Count_ColumnImpl : public ReduceUnary_ColumnImpl { count = isvalid? 0: static_cast(i1 - i0); } else { count = isvalid? static_cast(i1 - i0) : 0; - } + } *out = count; return true; // *out is not NA } else { From cba820408e0629d2e9bceae1a15643145f9ec813 Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Tue, 14 Mar 2023 16:43:51 +1100 Subject: [PATCH 024/124] Update src/core/column/countna.h Co-authored-by: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> --- src/core/column/countna.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/column/countna.h b/src/core/column/countna.h index 0fd88d036..821118ea0 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -38,7 +38,7 @@ class Count_ColumnImpl : public ReduceUnary_ColumnImpl { if (IS_GROUPED){ bool isvalid = this->col_.get_element(i, &value); if (COUNTNA){ - count = isvalid? 0: static_cast(i1 - i0); + count = isvalid? 0 : static_cast(i1 - i0); } else { count = isvalid? static_cast(i1 - i0) : 0; } From 11ced22620e659f9f67dc95cc395d423b4fe483f Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Tue, 14 Mar 2023 16:44:19 +1100 Subject: [PATCH 025/124] Update src/core/column/countna.h Co-authored-by: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> --- src/core/column/countna.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/core/column/countna.h b/src/core/column/countna.h index 821118ea0..f84afee1a 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -45,13 +45,13 @@ class Count_ColumnImpl : public ReduceUnary_ColumnImpl { *out = count; return true; // *out is not NA } else { - for (size_t gi = i0; gi < i1; ++gi) { - bool isvalid = this->col_.get_element(gi, &value); - count += COUNTNA? !isvalid : isvalid; - } - *out = count; - return true; // *out is not NA + for (size_t gi = i0; gi < i1; ++gi) { + bool isvalid = this->col_.get_element(gi, &value); + count += COUNTNA? !isvalid : isvalid; } + *out = count; + return true; // *out is not NA + } } }; From 0a2f5bef31c4d55e0dfb709787b1f46a83f70615 Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Tue, 14 Mar 2023 16:44:35 +1100 Subject: [PATCH 026/124] Update docs/api/dt/count.rst Co-authored-by: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> --- docs/api/dt/count.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/api/dt/count.rst b/docs/api/dt/count.rst index a69bb3863..f791554a0 100644 --- a/docs/api/dt/count.rst +++ b/docs/api/dt/count.rst @@ -5,8 +5,8 @@ :tests: tests/test-reduce.py :signature: count(cols=None) - Calculate the number of non-missing values for each column from `cols`, if `cols` is provided, - or the total number of rows if `cols` is not provided. + Calculate the number of non-missing values for each column from `cols`. When `cols` is not provided, + calculate the total number of rows. Parameters ---------- From 8077bbae23c4e6e38d3ffa2956d0805f836c1bb1 Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Tue, 14 Mar 2023 16:45:40 +1100 Subject: [PATCH 027/124] Update docs/api/dt/count.rst Co-authored-by: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> --- docs/api/dt/count.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api/dt/count.rst b/docs/api/dt/count.rst index f791554a0..e57af256d 100644 --- a/docs/api/dt/count.rst +++ b/docs/api/dt/count.rst @@ -11,7 +11,7 @@ Parameters ---------- cols: FExpr - Input columns. If no `cols` is passed, then the count of all rows is returned. + Input columns if any. return: Expr f-expression having one row, and the same names and number of columns From b1c7f983a17e1187d5717b98fcc59be16afeda8b Mon Sep 17 00:00:00 2001 From: samukweku Date: Tue, 14 Mar 2023 17:21:39 +1100 Subject: [PATCH 028/124] more descriptive template variables --- src/core/column/countna.h | 8 ++++---- src/core/column/minmax.h | 8 ++++---- src/core/column/reduce_unary.h | 6 +++--- src/core/column/sumprod.h | 8 ++++---- src/core/expr/fexpr_count_countna.cc | 6 +++--- src/core/expr/fexpr_mean.cc | 6 +++--- src/core/expr/fexpr_sumprod.cc | 6 +++--- 7 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/core/column/countna.h b/src/core/column/countna.h index f84afee1a..5c9bba6bb 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -25,13 +25,13 @@ namespace dt { -template -class Count_ColumnImpl : public ReduceUnary_ColumnImpl { +template +class Count_ColumnImpl : public ReduceUnary_ColumnImpl { public: - using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; + using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; bool get_element(size_t i, int64_t* out) const override { - T value; + T_IN value; size_t i0, i1; this->gby_.get_group(i, &i0, &i1); int64_t count = 0; diff --git a/src/core/column/minmax.h b/src/core/column/minmax.h index 95fbd0599..a339e67ef 100644 --- a/src/core/column/minmax.h +++ b/src/core/column/minmax.h @@ -35,14 +35,14 @@ class MinMax_ColumnImpl : public ReduceUnary_ColumnImpl { // initially being set to `true`. So the default value here // only silences the compiler warning and makes the update // to happen a little bit faster, but it has no effect on the final result. - T res = MIN ? std::numeric_limits::max() - : std::numeric_limits::min(); + T_IN res = MIN ? std::numeric_limits::max() + : std::numeric_limits::min(); bool res_isna = true; size_t i0, i1; this->gby_.get_group(i, &i0, &i1); for (size_t gi = i0; gi < i1; ++gi) { - T value; + T_IN value; bool isvalid = this->col_.get_element(gi, &value); if (MIN) { if (isvalid && (value < res || res_isna)) { @@ -57,7 +57,7 @@ class MinMax_ColumnImpl : public ReduceUnary_ColumnImpl { } } - *out = static_cast(res); + *out = static_cast(res); return !res_isna; } }; diff --git a/src/core/column/reduce_unary.h b/src/core/column/reduce_unary.h index 3eeb39255..1dbe95552 100644 --- a/src/core/column/reduce_unary.h +++ b/src/core/column/reduce_unary.h @@ -26,7 +26,7 @@ namespace dt { -template +template class ReduceUnary_ColumnImpl : public Virtual_ColumnImpl { protected: Column col_; @@ -35,11 +35,11 @@ class ReduceUnary_ColumnImpl : public Virtual_ColumnImpl { public: ReduceUnary_ColumnImpl(Column &&col, const Groupby& gby) - : Virtual_ColumnImpl(gby.size(), stype_from), + : Virtual_ColumnImpl(gby.size(), stype_from), col_(std::move(col)), gby_(gby) { - xassert(col_.can_be_read_as()); + xassert(col_.can_be_read_as()); } diff --git a/src/core/column/sumprod.h b/src/core/column/sumprod.h index 10bb3a074..c461996b6 100644 --- a/src/core/column/sumprod.h +++ b/src/core/column/sumprod.h @@ -31,9 +31,9 @@ class SumProd_ColumnImpl : public ReduceUnary_ColumnImpl { public: using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; - bool get_element(size_t i, U* out) const override { - T result = !SUM; // 0 for `sum()` and 1 for `prod()` - T value; + bool get_element(size_t i, T_OUT* out) const override { + T_IN result = !SUM; // 0 for `sum()` and 1 for `prod()` + T_IN value; size_t i0, i1; this->gby_.get_group(i, &i0, &i1); @@ -41,7 +41,7 @@ class SumProd_ColumnImpl : public ReduceUnary_ColumnImpl { size_t nrows = i1 - i0; bool is_valid = this->col_.get_element(i, &value); if (is_valid){ - result = SUM? static_cast(nrows) * value + result = SUM? static_cast(nrows) * value : ipow(value, nrows); } } else { diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index e9c7b6475..5bbab5d7a 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -124,14 +124,14 @@ class FExpr_CountNA : public FExpr_Func { } - template + template Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new Count_ColumnImpl( + return Column(new Latent_ColumnImpl(new Count_ColumnImpl( std::move(col), gby ))); } else { - return Column(new Latent_ColumnImpl(new Count_ColumnImpl( + return Column(new Latent_ColumnImpl(new Count_ColumnImpl( std::move(col), gby ))); } diff --git a/src/core/expr/fexpr_mean.cc b/src/core/expr/fexpr_mean.cc index 162417367..1b9b360ea 100644 --- a/src/core/expr/fexpr_mean.cc +++ b/src/core/expr/fexpr_mean.cc @@ -85,14 +85,14 @@ class FExpr_Mean : public FExpr_ReduceUnary { } - template + template Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( + return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( std::move(col), gby ))); } else { - return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( + return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( std::move(col), gby ))); } diff --git a/src/core/expr/fexpr_sumprod.cc b/src/core/expr/fexpr_sumprod.cc index 4daf57b84..4c0a8c911 100644 --- a/src/core/expr/fexpr_sumprod.cc +++ b/src/core/expr/fexpr_sumprod.cc @@ -67,14 +67,14 @@ class FExpr_SumProd : public FExpr_ReduceUnary { } - template + template Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( + return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( std::move(col), gby ))); } else { - return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( + return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( std::move(col), gby ))); } From 175ed0085447c69bcb48f6765cfd4c825a665775 Mon Sep 17 00:00:00 2001 From: samukweku Date: Tue, 14 Mar 2023 17:28:43 +1100 Subject: [PATCH 029/124] single template type for min/max/sum/prod --- src/core/column/minmax.h | 2 +- src/core/column/sumprod.h | 2 +- src/core/expr/fexpr_sumprod.cc | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core/column/minmax.h b/src/core/column/minmax.h index a339e67ef..93dce5fe1 100644 --- a/src/core/column/minmax.h +++ b/src/core/column/minmax.h @@ -57,7 +57,7 @@ class MinMax_ColumnImpl : public ReduceUnary_ColumnImpl { } } - *out = static_cast(res); + *out = static_cast(res); return !res_isna; } }; diff --git a/src/core/column/sumprod.h b/src/core/column/sumprod.h index c461996b6..4055e4e2e 100644 --- a/src/core/column/sumprod.h +++ b/src/core/column/sumprod.h @@ -31,7 +31,7 @@ class SumProd_ColumnImpl : public ReduceUnary_ColumnImpl { public: using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; - bool get_element(size_t i, T_OUT* out) const override { + bool get_element(size_t i, T_IN* out) const override { T_IN result = !SUM; // 0 for `sum()` and 1 for `prod()` T_IN value; size_t i0, i1; diff --git a/src/core/expr/fexpr_sumprod.cc b/src/core/expr/fexpr_sumprod.cc index 4c0a8c911..e452f8da2 100644 --- a/src/core/expr/fexpr_sumprod.cc +++ b/src/core/expr/fexpr_sumprod.cc @@ -70,11 +70,11 @@ class FExpr_SumProd : public FExpr_ReduceUnary { template Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( + return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( std::move(col), gby ))); } else { - return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( + return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( std::move(col), gby ))); } From 6131f699261473d41c4410447d2a96e0d3fdd21d Mon Sep 17 00:00:00 2001 From: samukweku Date: Tue, 14 Mar 2023 20:28:42 +1100 Subject: [PATCH 030/124] count all rows use unary impl --- src/core/column/count_all_rows.h | 46 +++++----------------------- src/core/expr/fexpr_count_countna.cc | 11 +++---- tests/dt/test-countna.py | 3 +- 3 files changed, 14 insertions(+), 46 deletions(-) diff --git a/src/core/column/count_all_rows.h b/src/core/column/count_all_rows.h index 0022efd48..6831b6338 100644 --- a/src/core/column/count_all_rows.h +++ b/src/core/column/count_all_rows.h @@ -21,51 +21,21 @@ //------------------------------------------------------------------------------ #ifndef dt_COLUMN_COUNTALLROWS_h #define dt_COLUMN_COUNTALLROWS_h -#include "column/virtual.h" -#include "parallel/api.h" -#include "stype.h" +#include "column/reduce_unary.h" namespace dt { -class CountAllRows_ColumnImpl : public Virtual_ColumnImpl { - private: - Groupby gby_; +class CountAllRows_ColumnImpl : public ReduceUnary_ColumnImpl { public: - CountAllRows_ColumnImpl(const Groupby& gby) - : Virtual_ColumnImpl(gby.size(), SType::INT64), - gby_(gby) - {} + using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; - - ColumnImpl* clone() const override { - return new CountAllRows_ColumnImpl(gby_); - } - - - size_t n_children() const noexcept override { - return 0; + bool get_element(size_t i, int64_t* out) const override { + size_t i0, i1; + this->gby_.get_group(i, &i0, &i1); + *out = static_cast(i1 - i0); + return true; } - - void materialize(Column &col_out, bool) override { - size_t nrows = gby_.size(); - const int32_t* offsets = gby_.offsets_r(); - Column col = Column::new_data_column(nrows, SType::INT64); - auto data = static_cast(col.get_data_editable()); - dt::parallel_for_dynamic(gby_.size(), - [&](size_t gi) { - for (size_t i = 0; i < nrows; ++i) { - data[i] = offsets[i + 1] - offsets[i]; - } - } - ); - - col_out = std::move(col); - } - }; - } // namespace dt - - #endif diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index 5bbab5d7a..bf4f50cc7 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -62,10 +62,8 @@ class FExpr_CountNA : public FExpr_Func { if (count_all_rows && !COUNTNA) { Column coli; if (gby){ - coli = Column(new Latent_ColumnImpl( - new CountAllRows_ColumnImpl(gby) - )); - } else { + coli = Column(new Latent_ColumnImpl(new CountAllRows_ColumnImpl(wf.retrieve_column(0), gby))); + } else{ auto value = static_cast(ctx.nrows()); coli = Const_ColumnImpl::make_int_column(1, value, SType::INT64); } @@ -87,8 +85,8 @@ class FExpr_CountNA : public FExpr_Func { int64_t nrows = static_cast(ctx.nrows()); coli = Const_ColumnImpl::make_int_column(1, nrows, SType::INT64); } else { - coli = evaluate1(std::move(coli), gby, is_grouped); - } + coli = evaluate1(std::move(coli), gby, is_grouped); + } outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); } return outputs; @@ -123,7 +121,6 @@ class FExpr_CountNA : public FExpr_Func { } } - template Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { diff --git a/tests/dt/test-countna.py b/tests/dt/test-countna.py index 10e9ff8f6..a5235a610 100644 --- a/tests/dt/test-countna.py +++ b/tests/dt/test-countna.py @@ -79,4 +79,5 @@ def test_dt_count_na2(): def test_dt_countna_void(): DT = dt.Frame([None]) RES = DT[:, dt.countna(f.C0), dt.by(f.C0)] - EXP = dt.Frame({"C0":[None], "C1":[1]/dt.int64}) \ No newline at end of file + EXP = dt.Frame({"C0":[None], "C1":[1]/dt.int64}) + From ba50e3e702f2275ce7eaeb865de00f45562895be Mon Sep 17 00:00:00 2001 From: samukweku Date: Tue, 14 Mar 2023 20:51:20 +1100 Subject: [PATCH 031/124] create dummy column for count --- src/core/expr/fexpr_count_countna.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index bf4f50cc7..d158b1779 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -61,10 +61,12 @@ class FExpr_CountNA : public FExpr_Func { if (count_all_rows && !COUNTNA) { Column coli; + auto value = static_cast(ctx.nrows()); if (gby){ - coli = Column(new Latent_ColumnImpl(new CountAllRows_ColumnImpl(wf.retrieve_column(0), gby))); + coli = Const_ColumnImpl::make_int_column(value, 1, SType::INT64); + coli = Column(new Latent_ColumnImpl(new CountAllRows_ColumnImpl(std::move(coli), gby))); } else{ - auto value = static_cast(ctx.nrows()); + coli = Const_ColumnImpl::make_int_column(1, value, SType::INT64); } outputs.add_column(std::move(coli), "count", Grouping::GtoONE); From 3cc0ddfd228d0f5acbf31ebc40359a59bb3e2ad5 Mon Sep 17 00:00:00 2001 From: samukweku Date: Tue, 14 Mar 2023 20:51:45 +1100 Subject: [PATCH 032/124] remove whitespace --- src/core/expr/fexpr_count_countna.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index d158b1779..979b083d5 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -65,8 +65,7 @@ class FExpr_CountNA : public FExpr_Func { if (gby){ coli = Const_ColumnImpl::make_int_column(value, 1, SType::INT64); coli = Column(new Latent_ColumnImpl(new CountAllRows_ColumnImpl(std::move(coli), gby))); - } else{ - + } else{ coli = Const_ColumnImpl::make_int_column(1, value, SType::INT64); } outputs.add_column(std::move(coli), "count", Grouping::GtoONE); From 171289bb0869385f4f325efabf84abb4a54db4eb Mon Sep 17 00:00:00 2001 From: samukweku Date: Wed, 15 Mar 2023 09:22:36 +1100 Subject: [PATCH 033/124] add countna when cols is None --- src/core/expr/fexpr_count_countna.cc | 6 ++++++ src/datatable/expr/reduce.py | 2 +- tests/dt/test-countna.py | 11 +++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index 979b083d5..e7a802036 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -76,6 +76,12 @@ class FExpr_CountNA : public FExpr_Func { gby = Groupby::single_group(wf.nrows()); } + if (count_all_rows && COUNTNA){ + Column coli = Const_ColumnImpl::make_int_column(gby.size(), 0, SType::INT64); + outputs.add_column(std::move(coli), std::string(), Grouping::GtoONE); + return outputs; + } + for (size_t i = 0; i < wf.ncols(); ++i) { bool is_grouped = ctx.has_group_column( wf.get_frame_id(i), diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index 54f715283..0c9d0fa21 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -59,7 +59,7 @@ def nunique(iterable=None): return Expr(OpCodes.NUNIQUE, (iterable,)) -def countna(iterable): +def countna(iterable=None): return core.countna(iterable) diff --git a/tests/dt/test-countna.py b/tests/dt/test-countna.py index a5235a610..79148ebcb 100644 --- a/tests/dt/test-countna.py +++ b/tests/dt/test-countna.py @@ -81,3 +81,14 @@ def test_dt_countna_void(): RES = DT[:, dt.countna(f.C0), dt.by(f.C0)] EXP = dt.Frame({"C0":[None], "C1":[1]/dt.int64}) +def test_dt_countna_None_by(): + DT = dt.Frame(G=[1,1,1,2,2,2], V=[None, None, None, None, 3, 5]) + EXP = dt.Frame(G=[1,2], C0=[0,0]) + RES = DT[:, dt.countna(), f.G] + assert EXP.to_list() == RES.to_list() + +def test_dt_countna_None(): + DT = dt.Frame(G=[1,1,1,2,2,2], V=[None, None, None, None, 3, 5]) + EXP = dt.Frame(C0=[0]) + RES = DT[:, dt.countna()] + assert EXP.to_list() == RES.to_list() \ No newline at end of file From a6ad80c4a5d7d32e1e0bcb22a4c3d2a8c87b7f7d Mon Sep 17 00:00:00 2001 From: samukweku Date: Wed, 15 Mar 2023 09:26:08 +1100 Subject: [PATCH 034/124] add more details for count and countna --- docs/api/dt/count.rst | 2 ++ docs/api/dt/countna.rst | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/api/dt/count.rst b/docs/api/dt/count.rst index e57af256d..58ab320a4 100644 --- a/docs/api/dt/count.rst +++ b/docs/api/dt/count.rst @@ -16,6 +16,8 @@ return: Expr f-expression having one row, and the same names and number of columns as in `cols`. All the returned column stypes are `int64`. + If `cols` is not provided, the total number of rows + (a combination of the count of missing and non-missing values) is returned. except: TypeError The exception is raised when one of the columns from `cols` diff --git a/docs/api/dt/countna.rst b/docs/api/dt/countna.rst index fafbc0f1e..932fbab5d 100644 --- a/docs/api/dt/countna.rst +++ b/docs/api/dt/countna.rst @@ -7,7 +7,7 @@ .. x-version-added:: 1.1.0 - Count the number of NA values for each column from `cols`. + Count the number of NA values for each column from `cols`. Parameters ---------- @@ -17,6 +17,7 @@ return: Expr f-expression having one row, and the same names and number of columns as in `cols`. All the returned column stypes are `int64`. + If `cols` is not provided, 0 is returned per group. except: TypeError The exception is raised when one of the columns from `cols` From 8394ef14f159f22226d508697a2c186923ee6813 Mon Sep 17 00:00:00 2001 From: samukweku Date: Wed, 15 Mar 2023 20:59:23 +1100 Subject: [PATCH 035/124] update countna --- docs/api/dt/count.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api/dt/count.rst b/docs/api/dt/count.rst index 58ab320a4..d8ec3b7d6 100644 --- a/docs/api/dt/count.rst +++ b/docs/api/dt/count.rst @@ -68,4 +68,4 @@ See Also -------- - - :func:`sum()` -- function to calculate the sum of values. + - :func:`counta()` -- function to count the number of missing values. From 9b297dcdfb38f6738608b2afc3d1e1c3e21378a8 Mon Sep 17 00:00:00 2001 From: samukweku Date: Wed, 15 Mar 2023 22:06:22 +1100 Subject: [PATCH 036/124] fix countna docs link --- docs/api/dt/count.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api/dt/count.rst b/docs/api/dt/count.rst index d8ec3b7d6..a62f60537 100644 --- a/docs/api/dt/count.rst +++ b/docs/api/dt/count.rst @@ -68,4 +68,4 @@ See Also -------- - - :func:`counta()` -- function to count the number of missing values. + - :func:`countna()` -- function to count the number of missing values. From 3f958c9bf588eacb8c32e10b240f14690469b71c Mon Sep 17 00:00:00 2001 From: samukweku Date: Thu, 16 Mar 2023 09:10:25 +1100 Subject: [PATCH 037/124] update counta logic for Frame vs FExpr --- src/datatable/expr/reduce.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index 0c9d0fa21..b9ffd884b 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -60,6 +60,8 @@ def nunique(iterable=None): def countna(iterable=None): + if isinstance(iterable, core.Frame): + return iterable.countna() return core.countna(iterable) From 316a71b1728bf7e7dbc7450b5deb8323ed6f81ea Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Fri, 17 Mar 2023 09:56:10 +1100 Subject: [PATCH 038/124] Update src/core/expr/fexpr_count_countna.cc Co-authored-by: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> --- src/core/expr/fexpr_count_countna.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index e7a802036..02ad8e6ff 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -92,8 +92,8 @@ class FExpr_CountNA : public FExpr_Func { int64_t nrows = static_cast(ctx.nrows()); coli = Const_ColumnImpl::make_int_column(1, nrows, SType::INT64); } else { - coli = evaluate1(std::move(coli), gby, is_grouped); - } + coli = evaluate1(std::move(coli), gby, is_grouped); + } outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); } return outputs; From b2c4ea54d4e757ec9844121426430de7aa7e863f Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Fri, 17 Mar 2023 09:56:18 +1100 Subject: [PATCH 039/124] Update src/core/expr/fexpr_count_countna.cc Co-authored-by: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> --- src/core/expr/fexpr_count_countna.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index 02ad8e6ff..4402d9f15 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -66,8 +66,8 @@ class FExpr_CountNA : public FExpr_Func { coli = Const_ColumnImpl::make_int_column(value, 1, SType::INT64); coli = Column(new Latent_ColumnImpl(new CountAllRows_ColumnImpl(std::move(coli), gby))); } else{ - coli = Const_ColumnImpl::make_int_column(1, value, SType::INT64); - } + coli = Const_ColumnImpl::make_int_column(1, value, SType::INT64); + } outputs.add_column(std::move(coli), "count", Grouping::GtoONE); return outputs; } From 0f9e60e9a1f63718b77404be0823535d12122db0 Mon Sep 17 00:00:00 2001 From: samukweku Date: Fri, 17 Mar 2023 21:20:14 +1100 Subject: [PATCH 040/124] update count_all_rows to avoid dummy col creation --- src/core/column/count_all_rows.h | 6 ++-- src/core/column/reduce_nullary.h | 53 ++++++++++++++++++++++++++++ src/core/expr/fexpr_count_countna.cc | 3 +- 3 files changed, 57 insertions(+), 5 deletions(-) create mode 100644 src/core/column/reduce_nullary.h diff --git a/src/core/column/count_all_rows.h b/src/core/column/count_all_rows.h index 6831b6338..de0a1a334 100644 --- a/src/core/column/count_all_rows.h +++ b/src/core/column/count_all_rows.h @@ -21,13 +21,13 @@ //------------------------------------------------------------------------------ #ifndef dt_COLUMN_COUNTALLROWS_h #define dt_COLUMN_COUNTALLROWS_h -#include "column/reduce_unary.h" +#include "column/reduce_nullary.h" namespace dt { -class CountAllRows_ColumnImpl : public ReduceUnary_ColumnImpl { +class CountAllRows_ColumnImpl : public ReduceNullary_ColumnImpl { public: - using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; + using ReduceNullary_ColumnImpl::ReduceNullary_ColumnImpl; bool get_element(size_t i, int64_t* out) const override { size_t i0, i1; diff --git a/src/core/column/reduce_nullary.h b/src/core/column/reduce_nullary.h new file mode 100644 index 000000000..1019b3595 --- /dev/null +++ b/src/core/column/reduce_nullary.h @@ -0,0 +1,53 @@ +//------------------------------------------------------------------------------ +// Copyright 2023 H2O.ai +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. +//------------------------------------------------------------------------------ +#ifndef dt_COLUMN_REDUCE_NULLARY_h +#define dt_COLUMN_REDUCE_NULLARY_h +#include "column/virtual.h" +#include "stype.h" +namespace dt { + + +class ReduceNullary_ColumnImpl : public Virtual_ColumnImpl { + protected: + Groupby gby_; + + + public: + ReduceNullary_ColumnImpl(const Groupby& gby, SType stype) + : Virtual_ColumnImpl(gby.size(), stype), + gby_(gby) + {} + + + ColumnImpl *clone() const override { + return new ReduceNullary_ColumnImpl(Groupby(gby_), this->stype()); + } + + size_t n_children() const noexcept override { + return 0; + } + +}; + + +} // namespace dt +#endif diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index 4402d9f15..f987d4583 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -63,8 +63,7 @@ class FExpr_CountNA : public FExpr_Func { Column coli; auto value = static_cast(ctx.nrows()); if (gby){ - coli = Const_ColumnImpl::make_int_column(value, 1, SType::INT64); - coli = Column(new Latent_ColumnImpl(new CountAllRows_ColumnImpl(std::move(coli), gby))); + coli = Column(new Latent_ColumnImpl(new CountAllRows_ColumnImpl(gby, SType::INT64))); } else{ coli = Const_ColumnImpl::make_int_column(1, value, SType::INT64); } From e7ad7c1be02c3ac8b960a9bb554a15611d311b7e Mon Sep 17 00:00:00 2001 From: samukweku Date: Sat, 18 Mar 2023 12:48:20 +1100 Subject: [PATCH 041/124] cast inplace for sumprod --- src/core/expr/fexpr_sumprod.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/expr/fexpr_sumprod.cc b/src/core/expr/fexpr_sumprod.cc index e452f8da2..814a36dd5 100644 --- a/src/core/expr/fexpr_sumprod.cc +++ b/src/core/expr/fexpr_sumprod.cc @@ -55,6 +55,7 @@ class FExpr_SumProd : public FExpr_ReduceUnary { case SType::INT16: case SType::INT32: case SType::INT64: + col.cast_inplace(SType::INT64); return make(std::move(col), gby, is_grouped); case SType::FLOAT32: return make(std::move(col), gby, is_grouped); From 22fbcd73f062a7dafe16dc1df127eb228e7b05bf Mon Sep 17 00:00:00 2001 From: samukweku Date: Sat, 18 Mar 2023 15:53:15 +1100 Subject: [PATCH 042/124] restore cast in place for sumprod --- src/core/expr/fexpr_sumprod.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/core/expr/fexpr_sumprod.cc b/src/core/expr/fexpr_sumprod.cc index 814a36dd5..4e985059c 100644 --- a/src/core/expr/fexpr_sumprod.cc +++ b/src/core/expr/fexpr_sumprod.cc @@ -55,12 +55,11 @@ class FExpr_SumProd : public FExpr_ReduceUnary { case SType::INT16: case SType::INT32: case SType::INT64: - col.cast_inplace(SType::INT64); - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::INT64, gby, is_grouped); case SType::FLOAT32: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::FLOAT32, gby, is_grouped); case SType::FLOAT64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::FLOAT64, gby, is_grouped); default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr(); @@ -69,7 +68,8 @@ class FExpr_SumProd : public FExpr_ReduceUnary { template - Column make(Column &&col, const Groupby& gby, bool is_grouped) const { + Column make(Column &&col, SType stype, const Groupby& gby, bool is_grouped) const { + col.cast_inplace(stype); if (is_grouped) { return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( std::move(col), gby From 6a45afdb7ac2fd794bc1e624680525ea1c6bc8f7 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 19 Mar 2023 22:12:28 +1100 Subject: [PATCH 043/124] add explicit stype to reduce_unary.h --- src/core/column/reduce_unary.h | 6 +++--- src/core/expr/fexpr_count_countna.cc | 20 ++++++++++---------- src/core/expr/fexpr_mean.cc | 25 +++++++++++-------------- src/core/expr/fexpr_minmax.cc | 4 ++-- src/core/expr/fexpr_sumprod.cc | 4 ++-- tests/dt/test-countna.py | 2 +- 6 files changed, 29 insertions(+), 32 deletions(-) diff --git a/src/core/column/reduce_unary.h b/src/core/column/reduce_unary.h index 1dbe95552..a9f966b69 100644 --- a/src/core/column/reduce_unary.h +++ b/src/core/column/reduce_unary.h @@ -34,8 +34,8 @@ class ReduceUnary_ColumnImpl : public Virtual_ColumnImpl { public: - ReduceUnary_ColumnImpl(Column &&col, const Groupby& gby) - : Virtual_ColumnImpl(gby.size(), stype_from), + ReduceUnary_ColumnImpl(Column &&col, SType stype, const Groupby& gby) + : Virtual_ColumnImpl(gby.size(), stype), col_(std::move(col)), gby_(gby) { @@ -44,7 +44,7 @@ class ReduceUnary_ColumnImpl : public Virtual_ColumnImpl { ColumnImpl *clone() const override { - return new ReduceUnary_ColumnImpl(Column(col_), Groupby(gby_)); + return new ReduceUnary_ColumnImpl(Column(col_), this->stype(), Groupby(gby_)); } diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index f987d4583..eadbe99cf 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -105,22 +105,22 @@ class FExpr_CountNA : public FExpr_Func { case SType::VOID: case SType::BOOL: case SType::INT8: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::INT64, gby, is_grouped); case SType::INT16: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::INT64, gby, is_grouped); case SType::DATE32: case SType::INT32: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::INT64, gby, is_grouped); case SType::TIME64: case SType::INT64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::INT64, gby, is_grouped); case SType::FLOAT32: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::INT64, gby, is_grouped); case SType::FLOAT64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::INT64, gby, is_grouped); case SType::STR32: case SType::STR64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::INT64, gby, is_grouped); default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr(); @@ -128,14 +128,14 @@ class FExpr_CountNA : public FExpr_Func { } template - Column make(Column &&col, const Groupby& gby, bool is_grouped) const { + Column make(Column &&col, SType stype, const Groupby& gby, bool is_grouped) const { if (is_grouped) { return Column(new Latent_ColumnImpl(new Count_ColumnImpl( - std::move(col), gby + std::move(col), stype, gby ))); } else { return Column(new Latent_ColumnImpl(new Count_ColumnImpl( - std::move(col), gby + std::move(col), stype, gby ))); } } diff --git a/src/core/expr/fexpr_mean.cc b/src/core/expr/fexpr_mean.cc index 1b9b360ea..9c24af174 100644 --- a/src/core/expr/fexpr_mean.cc +++ b/src/core/expr/fexpr_mean.cc @@ -52,24 +52,20 @@ class FExpr_Mean : public FExpr_ReduceUnary { )); case SType::BOOL: case SType::INT8: - return make(std::move(col), gby, is_grouped); case SType::INT16: - return make(std::move(col), gby, is_grouped); case SType::INT32: - return make(std::move(col), gby, is_grouped); case SType::INT64: - return make(std::move(col), gby, is_grouped); case SType::FLOAT64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::FLOAT64, gby, is_grouped); case SType::FLOAT32: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::FLOAT32, gby, is_grouped); case SType::DATE32: { - Column coli = make(std::move(col), gby, is_grouped); + Column coli = make(std::move(col), SType::FLOAT64, gby, is_grouped); coli.cast_inplace(SType::DATE32); return coli; } case SType::TIME64: { - Column coli = make(std::move(col), gby, is_grouped); + Column coli = make(std::move(col), SType::FLOAT64, gby, is_grouped); coli.cast_inplace(SType::TIME64); return coli; } @@ -85,15 +81,16 @@ class FExpr_Mean : public FExpr_ReduceUnary { } - template - Column make(Column &&col, const Groupby& gby, bool is_grouped) const { + template + Column make(Column &&col, SType stype, const Groupby& gby, bool is_grouped) const { + col.cast_inplace(stype); if (is_grouped) { - return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( - std::move(col), gby + return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( + std::move(col), stype, gby ))); } else { - return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( - std::move(col), gby + return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( + std::move(col), stype, gby ))); } } diff --git a/src/core/expr/fexpr_minmax.cc b/src/core/expr/fexpr_minmax.cc index 3a5faf5f6..9fd81fa35 100644 --- a/src/core/expr/fexpr_minmax.cc +++ b/src/core/expr/fexpr_minmax.cc @@ -52,7 +52,7 @@ class FExpr_MinMax : public FExpr_ReduceUnary { return Column(new ConstNa_ColumnImpl(gby.size(), stype)); case SType::BOOL: case SType::INT8: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::INT8, gby, is_grouped); case SType::INT16: return make(std::move(col), gby, is_grouped); case SType::INT32: @@ -62,7 +62,7 @@ class FExpr_MinMax : public FExpr_ReduceUnary { case SType::TIME64: return make(std::move(col), gby, is_grouped); case SType::FLOAT32: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::FLOAT32, gby, is_grouped); case SType::FLOAT64: return make(std::move(col), gby, is_grouped); default: diff --git a/src/core/expr/fexpr_sumprod.cc b/src/core/expr/fexpr_sumprod.cc index 4e985059c..5284e1fc6 100644 --- a/src/core/expr/fexpr_sumprod.cc +++ b/src/core/expr/fexpr_sumprod.cc @@ -72,11 +72,11 @@ class FExpr_SumProd : public FExpr_ReduceUnary { col.cast_inplace(stype); if (is_grouped) { return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( - std::move(col), gby + std::move(col), stype, gby ))); } else { return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( - std::move(col), gby + std::move(col), stype, gby ))); } } diff --git a/tests/dt/test-countna.py b/tests/dt/test-countna.py index 79148ebcb..4b085e35e 100644 --- a/tests/dt/test-countna.py +++ b/tests/dt/test-countna.py @@ -68,7 +68,7 @@ def test_dt_count_na1(src): RES = df[:, dt.countna(f[:])] assert_equals(EXP, RES) - +@pytest.mark.xfail(reason="commented out till #3417 is resolved.") def test_dt_count_na2(): DT = dt.Frame(G=[1,1,1,2,2,2], V=[None, None, None, None, 3, 5]) EXP = dt.Frame(G=[1,2], V1=[3,1], V2=[1,0]) From 254954e4cdf90ddf5f2ad7e924d1457e5944c56d Mon Sep 17 00:00:00 2001 From: samukweku Date: Mon, 20 Mar 2023 09:27:26 +1100 Subject: [PATCH 044/124] rename template parameter for single type --- src/core/column/minmax.h | 8 ++++---- src/core/expr/fexpr_mean.cc | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/core/column/minmax.h b/src/core/column/minmax.h index 93dce5fe1..9bfdb2905 100644 --- a/src/core/column/minmax.h +++ b/src/core/column/minmax.h @@ -35,14 +35,14 @@ class MinMax_ColumnImpl : public ReduceUnary_ColumnImpl { // initially being set to `true`. So the default value here // only silences the compiler warning and makes the update // to happen a little bit faster, but it has no effect on the final result. - T_IN res = MIN ? std::numeric_limits::max() - : std::numeric_limits::min(); + T res = MIN ? std::numeric_limits::max() + : std::numeric_limits::min(); bool res_isna = true; size_t i0, i1; this->gby_.get_group(i, &i0, &i1); for (size_t gi = i0; gi < i1; ++gi) { - T_IN value; + T value; bool isvalid = this->col_.get_element(gi, &value); if (MIN) { if (isvalid && (value < res || res_isna)) { @@ -57,7 +57,7 @@ class MinMax_ColumnImpl : public ReduceUnary_ColumnImpl { } } - *out = static_cast(res); + *out = static_cast(res); return !res_isna; } }; diff --git a/src/core/expr/fexpr_mean.cc b/src/core/expr/fexpr_mean.cc index 9c24af174..22d1c3820 100644 --- a/src/core/expr/fexpr_mean.cc +++ b/src/core/expr/fexpr_mean.cc @@ -81,15 +81,15 @@ class FExpr_Mean : public FExpr_ReduceUnary { } - template + template Column make(Column &&col, SType stype, const Groupby& gby, bool is_grouped) const { col.cast_inplace(stype); if (is_grouped) { - return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( + return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( std::move(col), stype, gby ))); } else { - return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( + return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( std::move(col), stype, gby ))); } From 178843858e785607216397a7388a5e90fc64709a Mon Sep 17 00:00:00 2001 From: samukweku Date: Fri, 24 Mar 2023 23:15:45 +1100 Subject: [PATCH 045/124] update code to use FExpr_ReduceUnary --- src/core/column/count_all_rows.h | 41 ------ src/core/column/countna.h | 8 +- .../{reduce_nullary.h => countna_no_args.h} | 24 ++-- src/core/column/mean.h | 6 +- src/core/column/minmax.h | 4 +- src/core/column/reduce_unary.h | 2 +- src/core/column/sumprod.h | 12 +- src/core/expr/fexpr_count_countna.cc | 134 ++++++++---------- src/core/expr/fexpr_mean.cc | 38 ++--- src/core/expr/fexpr_minmax.cc | 14 +- src/core/expr/fexpr_sumprod.cc | 10 +- src/datatable/expr/reduce.py | 2 +- 12 files changed, 122 insertions(+), 173 deletions(-) delete mode 100644 src/core/column/count_all_rows.h rename src/core/column/{reduce_nullary.h => countna_no_args.h} (74%) diff --git a/src/core/column/count_all_rows.h b/src/core/column/count_all_rows.h deleted file mode 100644 index de0a1a334..000000000 --- a/src/core/column/count_all_rows.h +++ /dev/null @@ -1,41 +0,0 @@ -//------------------------------------------------------------------------------ -// Copyright 2023 H2O.ai -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. -//------------------------------------------------------------------------------ -#ifndef dt_COLUMN_COUNTALLROWS_h -#define dt_COLUMN_COUNTALLROWS_h -#include "column/reduce_nullary.h" -namespace dt { - - -class CountAllRows_ColumnImpl : public ReduceNullary_ColumnImpl { - public: - using ReduceNullary_ColumnImpl::ReduceNullary_ColumnImpl; - - bool get_element(size_t i, int64_t* out) const override { - size_t i0, i1; - this->gby_.get_group(i, &i0, &i1); - *out = static_cast(i1 - i0); - return true; - } -}; - -} // namespace dt -#endif diff --git a/src/core/column/countna.h b/src/core/column/countna.h index 5c9bba6bb..c836866a7 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -25,13 +25,13 @@ namespace dt { -template -class Count_ColumnImpl : public ReduceUnary_ColumnImpl { +template +class Count_ColumnImpl : public ReduceUnary_ColumnImpl { public: - using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; + using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; bool get_element(size_t i, int64_t* out) const override { - T_IN value; + T value; size_t i0, i1; this->gby_.get_group(i, &i0, &i1); int64_t count = 0; diff --git a/src/core/column/reduce_nullary.h b/src/core/column/countna_no_args.h similarity index 74% rename from src/core/column/reduce_nullary.h rename to src/core/column/countna_no_args.h index 1019b3595..0e6b1b405 100644 --- a/src/core/column/reduce_nullary.h +++ b/src/core/column/countna_no_args.h @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2023 H2O.ai +// Copyright 2019-2021 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -19,35 +19,43 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS // IN THE SOFTWARE. //------------------------------------------------------------------------------ -#ifndef dt_COLUMN_REDUCE_NULLARY_h -#define dt_COLUMN_REDUCE_NULLARY_h +#ifndef dt_COLUMN_COUNTNA_ALLROWS_h +#define dt_COLUMN_COUNTNA_ALLROWS_h #include "column/virtual.h" #include "stype.h" namespace dt { -class ReduceNullary_ColumnImpl : public Virtual_ColumnImpl { +class CountAllRows_ColumnImpl : public Virtual_ColumnImpl { protected: Groupby gby_; - public: - ReduceNullary_ColumnImpl(const Groupby& gby, SType stype) - : Virtual_ColumnImpl(gby.size(), stype), + CountAllRows_ColumnImpl(const Groupby& gby) + : Virtual_ColumnImpl(gby.size(), SType::INT64), gby_(gby) {} ColumnImpl *clone() const override { - return new ReduceNullary_ColumnImpl(Groupby(gby_), this->stype()); + return new CountAllRows_ColumnImpl(Groupby(gby_)); } size_t n_children() const noexcept override { return 0; } + + bool get_element(size_t i, int64_t* out) const override { + size_t i0, i1; + this->gby_.get_group(i, &i0, &i1); + *out = static_cast(i1 - i0); + return true; + } }; + + } // namespace dt #endif diff --git a/src/core/column/mean.h b/src/core/column/mean.h index e3863ca95..f89b52d10 100644 --- a/src/core/column/mean.h +++ b/src/core/column/mean.h @@ -26,9 +26,9 @@ namespace dt { template -class Mean_ColumnImpl : public ReduceUnary_ColumnImpl { +class Mean_ColumnImpl : public ReduceUnary_ColumnImpl { public: - using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; + using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; bool get_element(size_t i, T* out) const override { T value; @@ -53,4 +53,4 @@ class Mean_ColumnImpl : public ReduceUnary_ColumnImpl { } // namespace dt -#endif +#endif \ No newline at end of file diff --git a/src/core/column/minmax.h b/src/core/column/minmax.h index 9bfdb2905..8a8b5a5db 100644 --- a/src/core/column/minmax.h +++ b/src/core/column/minmax.h @@ -26,9 +26,9 @@ namespace dt { template -class MinMax_ColumnImpl : public ReduceUnary_ColumnImpl { +class MinMax_ColumnImpl : public ReduceUnary_ColumnImpl { public: - using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; + using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; bool get_element(size_t i, T* out) const override { // res` will be updated on the first valid element, due to `res_isna` diff --git a/src/core/column/reduce_unary.h b/src/core/column/reduce_unary.h index a9f966b69..ec58ba745 100644 --- a/src/core/column/reduce_unary.h +++ b/src/core/column/reduce_unary.h @@ -26,7 +26,7 @@ namespace dt { -template +template class ReduceUnary_ColumnImpl : public Virtual_ColumnImpl { protected: Column col_; diff --git a/src/core/column/sumprod.h b/src/core/column/sumprod.h index 4055e4e2e..05c2e7423 100644 --- a/src/core/column/sumprod.h +++ b/src/core/column/sumprod.h @@ -27,13 +27,13 @@ namespace dt { template -class SumProd_ColumnImpl : public ReduceUnary_ColumnImpl { +class SumProd_ColumnImpl : public ReduceUnary_ColumnImpl { public: - using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; + using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; - bool get_element(size_t i, T_IN* out) const override { - T_IN result = !SUM; // 0 for `sum()` and 1 for `prod()` - T_IN value; + bool get_element(size_t i, T* out) const override { + T result = !SUM; // 0 for `sum()` and 1 for `prod()` + T value; size_t i0, i1; this->gby_.get_group(i, &i0, &i1); @@ -41,7 +41,7 @@ class SumProd_ColumnImpl : public ReduceUnary_ColumnImpl { size_t nrows = i1 - i0; bool is_valid = this->col_.get_element(i, &value); if (is_valid){ - result = SUM? static_cast(nrows) * value + result = SUM? static_cast(nrows) * value : ipow(value, nrows); } } else { diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index eadbe99cf..b6e2129b5 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2023 H2O.ai +// Copyright 2022-2023 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -22,9 +22,9 @@ #include "column/const.h" #include "column/latent.h" #include "column/countna.h" -#include "column/count_all_rows.h" +#include "column/countna_no_args.h" #include "documentation.h" -#include "expr/fexpr_func.h" +#include "expr/fexpr_reduce_unary.h" #include "expr/eval_context.h" #include "expr/workframe.h" #include "python/xargs.h" @@ -33,73 +33,17 @@ namespace dt { namespace expr { template -class FExpr_CountNA : public FExpr_Func { - private: - ptrExpr arg_; - +class FExpr_CountNA : public FExpr_ReduceUnary { public: - FExpr_CountNA(ptrExpr &&arg) - : arg_(std::move(arg)) {} - - std::string repr() const override { - std::string out = COUNTNA? "countna" : "count"; - out += '('; - if (arg_->get_expr_kind() != Kind::None) out += arg_->repr(); - out += ')'; - return out; - } - - - Workframe evaluate_n(EvalContext &ctx) const override { - Workframe outputs(ctx); - Workframe wf = arg_->evaluate_n(ctx); - Groupby gby = ctx.get_groupby(); - // this covers scenarios where - // we dont care about the presence or absence of NAs - // we just want the total number of rows - bool count_all_rows = arg_->get_expr_kind() == Kind::None; - - if (count_all_rows && !COUNTNA) { - Column coli; - auto value = static_cast(ctx.nrows()); - if (gby){ - coli = Column(new Latent_ColumnImpl(new CountAllRows_ColumnImpl(gby, SType::INT64))); - } else{ - coli = Const_ColumnImpl::make_int_column(1, value, SType::INT64); - } - outputs.add_column(std::move(coli), "count", Grouping::GtoONE); - return outputs; - } + using FExpr_ReduceUnary::FExpr_ReduceUnary; - if (!gby) { - gby = Groupby::single_group(wf.nrows()); - } - - if (count_all_rows && COUNTNA){ - Column coli = Const_ColumnImpl::make_int_column(gby.size(), 0, SType::INT64); - outputs.add_column(std::move(coli), std::string(), Grouping::GtoONE); - return outputs; - } - for (size_t i = 0; i < wf.ncols(); ++i) { - bool is_grouped = ctx.has_group_column( - wf.get_frame_id(i), - wf.get_column_id(i) - ); - Column coli = wf.retrieve_column(i); - if (COUNTNA && !ctx.has_groupby() && (coli.stype() == SType::VOID)) { - int64_t nrows = static_cast(ctx.nrows()); - coli = Const_ColumnImpl::make_int_column(1, nrows, SType::INT64); - } else { - coli = evaluate1(std::move(coli), gby, is_grouped); - } - outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); - } - return outputs; + std::string name() const override { + return COUNTNA? "countna" + : "count"; } - - Column evaluate1(Column &&col, const Groupby& gby, bool is_grouped) const { + Column evaluate1(Column&& col, const Groupby& gby, bool is_grouped) const { SType stype = col.stype(); switch (stype) { case SType::VOID: @@ -127,27 +71,74 @@ class FExpr_CountNA : public FExpr_Func { } } - template - Column make(Column &&col, SType stype, const Groupby& gby, bool is_grouped) const { + template + Column make(Column&& col, SType stype, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new Count_ColumnImpl( + return Column(new Latent_ColumnImpl(new Count_ColumnImpl( std::move(col), stype, gby ))); } else { - return Column(new Latent_ColumnImpl(new Count_ColumnImpl( + return Column(new Latent_ColumnImpl(new Count_ColumnImpl( std::move(col), stype, gby ))); } } }; + +// gets the count of all rows - nulls are not checked +template +class FExpr_CountNA_AllRows : public FExpr_Func { + public: + FExpr_CountNA_AllRows(){} + + std::string repr() const override { + std::string out = COUNTNA ? "countna(None)" + : "count()"; + return out; + } + + Workframe evaluate_n(EvalContext &ctx) const override { + Workframe wf(ctx); + Groupby gby = ctx.get_groupby(); + Column col; + + if (!gby) { + gby = Groupby::single_group(wf.nrows()); + } + + if (COUNTNA) { + col = Const_ColumnImpl::make_int_column(gby.size(), 0, SType::INT64); + wf.add_column(std::move(col), std::string(), Grouping::GtoONE); + return wf; + } + + if (ctx.has_groupby()) { + col = Column(new Latent_ColumnImpl(new CountAllRows_ColumnImpl(gby))); + } else { + auto value = static_cast(ctx.nrows()); + col = Const_ColumnImpl::make_int_column(1, value, SType::INT64); + } + wf.add_column(std::move(col), "count", Grouping::GtoONE); + return wf; + } + +}; + + static py::oobj pyfn_count(const py::XArgs &args) { auto count = args[0].to_oobj_or_none(); + if (count.is_none()) { + return PyFExpr::make(new FExpr_CountNA_AllRows()); + } return PyFExpr::make(new FExpr_CountNA(as_fexpr(count))); } static py::oobj pyfn_countna(const py::XArgs &args) { - auto countna = args[0].to_oobj(); + auto countna = args[0].to_oobj_or_none(); + if (countna.is_none()) { + return PyFExpr::make(new FExpr_CountNA_AllRows()); + } return PyFExpr::make(new FExpr_CountNA(as_fexpr(countna))); } @@ -163,8 +154,7 @@ DECLARE_PYFN(&pyfn_countna) ->name("countna") ->docs(doc_dt_countna) ->arg_names({"cols"}) - ->n_positional_args(1) - ->n_required_args(1); + ->n_positional_args(1); }} // dt::expr diff --git a/src/core/expr/fexpr_mean.cc b/src/core/expr/fexpr_mean.cc index 22d1c3820..baf2b472b 100644 --- a/src/core/expr/fexpr_mean.cc +++ b/src/core/expr/fexpr_mean.cc @@ -53,22 +53,16 @@ class FExpr_Mean : public FExpr_ReduceUnary { case SType::BOOL: case SType::INT8: case SType::INT16: - case SType::INT32: + case SType::INT32: case SType::INT64: + case SType::DATE32: + case SType::TIME64: case SType::FLOAT64: - return make(std::move(col), SType::FLOAT64, gby, is_grouped); + col_out = make(std::move(col), SType::FLOAT64, gby, is_grouped); + break; case SType::FLOAT32: - return make(std::move(col), SType::FLOAT32, gby, is_grouped); - case SType::DATE32: { - Column coli = make(std::move(col), SType::FLOAT64, gby, is_grouped); - coli.cast_inplace(SType::DATE32); - return coli; - } - case SType::TIME64: { - Column coli = make(std::move(col), SType::FLOAT64, gby, is_grouped); - coli.cast_inplace(SType::TIME64); - return coli; - } + col_out = make(std::move(col), SType::FLOAT32, gby, is_grouped); + break; default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr(); @@ -82,17 +76,13 @@ class FExpr_Mean : public FExpr_ReduceUnary { template - Column make(Column &&col, SType stype, const Groupby& gby, bool is_grouped) const { + Column make(Column&& col, SType stype, const Groupby& gby, bool is_grouped) const { col.cast_inplace(stype); - if (is_grouped) { - return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( - std::move(col), stype, gby - ))); - } else { - return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( - std::move(col), stype, gby - ))); - } + + return is_grouped? std::move(col) + : Column(new Latent_ColumnImpl(new Mean_ColumnImpl( + std::move(col), stype, gby + ))); } }; @@ -110,4 +100,4 @@ DECLARE_PYFN(&pyfn_mean) ->n_required_args(1); -}} // dt::expr +}} // dt::expr \ No newline at end of file diff --git a/src/core/expr/fexpr_minmax.cc b/src/core/expr/fexpr_minmax.cc index 9fd81fa35..0bba8253e 100644 --- a/src/core/expr/fexpr_minmax.cc +++ b/src/core/expr/fexpr_minmax.cc @@ -54,17 +54,19 @@ class FExpr_MinMax : public FExpr_ReduceUnary { case SType::INT8: return make(std::move(col), SType::INT8, gby, is_grouped); case SType::INT16: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::INT16, gby, is_grouped); case SType::INT32: + return make(std::move(col), SType::INT32, gby, is_grouped); case SType::DATE32: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::DATE32, gby, is_grouped); case SType::INT64: + return make(std::move(col), SType::INT64, gby, is_grouped); case SType::TIME64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::TIME64, gby, is_grouped); case SType::FLOAT32: return make(std::move(col), SType::FLOAT32, gby, is_grouped); case SType::FLOAT64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::FLOAT64, gby, is_grouped); default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr(); @@ -73,10 +75,10 @@ class FExpr_MinMax : public FExpr_ReduceUnary { template - Column make(Column&& col, const Groupby& gby, bool is_grouped) const { + Column make(Column&& col, SType stype, const Groupby& gby, bool is_grouped) const { return is_grouped? std::move(col) : Column(new Latent_ColumnImpl(new MinMax_ColumnImpl( - std::move(col), gby + std::move(col), stype, gby ))); } diff --git a/src/core/expr/fexpr_sumprod.cc b/src/core/expr/fexpr_sumprod.cc index 5284e1fc6..7f68895d9 100644 --- a/src/core/expr/fexpr_sumprod.cc +++ b/src/core/expr/fexpr_sumprod.cc @@ -67,15 +67,15 @@ class FExpr_SumProd : public FExpr_ReduceUnary { } - template - Column make(Column &&col, SType stype, const Groupby& gby, bool is_grouped) const { + template + Column make(Column&& col, SType stype, const Groupby& gby, bool is_grouped) const { col.cast_inplace(stype); if (is_grouped) { - return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( + return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( std::move(col), stype, gby ))); } else { - return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( + return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( std::move(col), stype, gby ))); } @@ -110,4 +110,4 @@ DECLARE_PYFN(&pyfn_prod) ->n_required_args(1); -}} // dt::expr +}} // dt::expr \ No newline at end of file diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index b9ffd884b..2b726857d 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -109,7 +109,7 @@ def corr(col1, col2): # noinspection PyShadowingBuiltins -def sum(iterable, start=0): +def sum(iterable=None, start=0): if (not isinstance(iterable, dict) and (isinstance(iterable, core.FExpr) or (iterable and hasattr(iterable, "__getitem__") and isinstance(iterable[0], core.FExpr)))): From c6f65d433e71bd939fa04bd06b8b7fdc5e482cd5 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sat, 25 Mar 2023 18:19:25 +1100 Subject: [PATCH 046/124] add example for countna when no col is passed --- docs/api/dt/countna.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/api/dt/countna.rst b/docs/api/dt/countna.rst index 932fbab5d..44c7f4a87 100644 --- a/docs/api/dt/countna.rst +++ b/docs/api/dt/countna.rst @@ -65,6 +65,16 @@ 0 | 2 [1 row x 1 column] + Get the count if no col is passed: + + >>> df[:, dt.countna()] + | C0 + | int64 + -- + ----- + 0 | 0 + [1 row x 1 column] + + See Also From 120e02f451cb8c44e98369effb1888558078ad2c Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Sat, 1 Apr 2023 13:29:16 +1100 Subject: [PATCH 047/124] Update src/core/expr/fexpr_count_countna.cc Co-authored-by: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> --- src/core/expr/fexpr_count_countna.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index b6e2129b5..4cf17226b 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -116,8 +116,8 @@ class FExpr_CountNA_AllRows : public FExpr_Func { if (ctx.has_groupby()) { col = Column(new Latent_ColumnImpl(new CountAllRows_ColumnImpl(gby))); } else { - auto value = static_cast(ctx.nrows()); - col = Const_ColumnImpl::make_int_column(1, value, SType::INT64); + auto value = static_cast(ctx.nrows()); + col = Const_ColumnImpl::make_int_column(1, value, SType::INT64); } wf.add_column(std::move(col), "count", Grouping::GtoONE); return wf; From 14a70479ba6fd75cacf4966b2d8829f82f2d353b Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Sat, 1 Apr 2023 13:29:43 +1100 Subject: [PATCH 048/124] Update src/core/expr/fexpr_count_countna.cc Co-authored-by: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> --- src/core/expr/fexpr_count_countna.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index 4cf17226b..a25a9ee68 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -109,7 +109,7 @@ class FExpr_CountNA_AllRows : public FExpr_Func { if (COUNTNA) { col = Const_ColumnImpl::make_int_column(gby.size(), 0, SType::INT64); - wf.add_column(std::move(col), std::string(), Grouping::GtoONE); + wf.add_column(std::move(col), "countna", Grouping::GtoONE); return wf; } From d3266a145c647903b2e4714f6b203633f4277d3e Mon Sep 17 00:00:00 2001 From: samukweku Date: Sat, 1 Apr 2023 15:17:33 +1100 Subject: [PATCH 049/124] updates based on feedback --- src/core/column/mean.h | 3 +- src/core/expr/fexpr_count_countna.cc | 44 ++++++++++++++-------------- src/core/expr/fexpr_mean.cc | 8 ++--- src/core/expr/fexpr_sumprod.cc | 13 ++++---- tests/dt/test-countna.py | 3 +- 5 files changed, 37 insertions(+), 34 deletions(-) diff --git a/src/core/column/mean.h b/src/core/column/mean.h index f89b52d10..b5010696c 100644 --- a/src/core/column/mean.h +++ b/src/core/column/mean.h @@ -53,4 +53,5 @@ class Mean_ColumnImpl : public ReduceUnary_ColumnImpl { } // namespace dt -#endif \ No newline at end of file +#endif + diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index a25a9ee68..cf3920ac8 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -33,7 +33,7 @@ namespace dt { namespace expr { template -class FExpr_CountNA : public FExpr_ReduceUnary { +class FExpr_Count : public FExpr_ReduceUnary { public: using FExpr_ReduceUnary::FExpr_ReduceUnary; @@ -49,22 +49,22 @@ class FExpr_CountNA : public FExpr_ReduceUnary { case SType::VOID: case SType::BOOL: case SType::INT8: - return make(std::move(col), SType::INT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::INT16: - return make(std::move(col), SType::INT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::DATE32: case SType::INT32: - return make(std::move(col), SType::INT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::TIME64: case SType::INT64: - return make(std::move(col), SType::INT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::FLOAT32: - return make(std::move(col), SType::INT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::FLOAT64: - return make(std::move(col), SType::INT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::STR32: case SType::STR64: - return make(std::move(col), SType::INT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr(); @@ -72,14 +72,14 @@ class FExpr_CountNA : public FExpr_ReduceUnary { } template - Column make(Column&& col, SType stype, const Groupby& gby, bool is_grouped) const { + Column make(Column&& col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { return Column(new Latent_ColumnImpl(new Count_ColumnImpl( - std::move(col), stype, gby + std::move(col), SType::INT64, gby ))); } else { return Column(new Latent_ColumnImpl(new Count_ColumnImpl( - std::move(col), stype, gby + std::move(col), SType::INT64, gby ))); } } @@ -88,13 +88,13 @@ class FExpr_CountNA : public FExpr_ReduceUnary { // gets the count of all rows - nulls are not checked template -class FExpr_CountNA_AllRows : public FExpr_Func { +class FExpr_Count_Rows : public FExpr_Func { public: - FExpr_CountNA_AllRows(){} + FExpr_Count_Rows(){} std::string repr() const override { - std::string out = COUNTNA ? "countna(None)" - : "count()"; + std::string out = COUNTNA? "countna(None)" + : "count()"; return out; } @@ -127,19 +127,19 @@ class FExpr_CountNA_AllRows : public FExpr_Func { static py::oobj pyfn_count(const py::XArgs &args) { - auto count = args[0].to_oobj_or_none(); - if (count.is_none()) { + auto arg = args[0].to_oobj_or_none(); + if (arg.is_none()) { return PyFExpr::make(new FExpr_CountNA_AllRows()); } - return PyFExpr::make(new FExpr_CountNA(as_fexpr(count))); + return PyFExpr::make(new FExpr_Count(as_fexpr(arg))); } static py::oobj pyfn_countna(const py::XArgs &args) { - auto countna = args[0].to_oobj_or_none(); - if (countna.is_none()) { - return PyFExpr::make(new FExpr_CountNA_AllRows()); + auto arg = args[0].to_oobj_or_none(); + if (arg.is_none()) { + return PyFExpr::make(new FExpr_Count_Rows()); } - return PyFExpr::make(new FExpr_CountNA(as_fexpr(countna))); + return PyFExpr::make(new FExpr_Count(as_fexpr(arg))); } diff --git a/src/core/expr/fexpr_mean.cc b/src/core/expr/fexpr_mean.cc index baf2b472b..3a0f2c73b 100644 --- a/src/core/expr/fexpr_mean.cc +++ b/src/core/expr/fexpr_mean.cc @@ -58,10 +58,10 @@ class FExpr_Mean : public FExpr_ReduceUnary { case SType::DATE32: case SType::TIME64: case SType::FLOAT64: - col_out = make(std::move(col), SType::FLOAT64, gby, is_grouped); + col_out = make(std::move(col), gby, is_grouped); break; case SType::FLOAT32: - col_out = make(std::move(col), SType::FLOAT32, gby, is_grouped); + col_out = make(std::move(col), gby, is_grouped); break; default: throw TypeError() @@ -76,12 +76,12 @@ class FExpr_Mean : public FExpr_ReduceUnary { template - Column make(Column&& col, SType stype, const Groupby& gby, bool is_grouped) const { + Column make(Column&& col, const Groupby& gby, bool is_grouped) const { col.cast_inplace(stype); return is_grouped? std::move(col) : Column(new Latent_ColumnImpl(new Mean_ColumnImpl( - std::move(col), stype, gby + std::move(col), col.stype(), gby ))); } }; diff --git a/src/core/expr/fexpr_sumprod.cc b/src/core/expr/fexpr_sumprod.cc index 7f68895d9..f5f93cee5 100644 --- a/src/core/expr/fexpr_sumprod.cc +++ b/src/core/expr/fexpr_sumprod.cc @@ -55,11 +55,11 @@ class FExpr_SumProd : public FExpr_ReduceUnary { case SType::INT16: case SType::INT32: case SType::INT64: - return make(std::move(col), SType::INT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::FLOAT32: - return make(std::move(col), SType::FLOAT32, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::FLOAT64: - return make(std::move(col), SType::FLOAT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr(); @@ -72,11 +72,11 @@ class FExpr_SumProd : public FExpr_ReduceUnary { col.cast_inplace(stype); if (is_grouped) { return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( - std::move(col), stype, gby + std::move(col), col.stype(), gby ))); } else { return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( - std::move(col), stype, gby + std::move(col), col.stype(), gby ))); } } @@ -110,4 +110,5 @@ DECLARE_PYFN(&pyfn_prod) ->n_required_args(1); -}} // dt::expr \ No newline at end of file +}} // dt::expr + diff --git a/tests/dt/test-countna.py b/tests/dt/test-countna.py index 4b085e35e..186dcc582 100644 --- a/tests/dt/test-countna.py +++ b/tests/dt/test-countna.py @@ -91,4 +91,5 @@ def test_dt_countna_None(): DT = dt.Frame(G=[1,1,1,2,2,2], V=[None, None, None, None, 3, 5]) EXP = dt.Frame(C0=[0]) RES = DT[:, dt.countna()] - assert EXP.to_list() == RES.to_list() \ No newline at end of file + assert EXP.to_list() == RES.to_list() + From 2436af61ff3b072a4ac544104f8cd99598576f55 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sat, 1 Apr 2023 15:19:46 +1100 Subject: [PATCH 050/124] remove xfail for countna --- tests/dt/test-countna.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/dt/test-countna.py b/tests/dt/test-countna.py index 186dcc582..17ad66d54 100644 --- a/tests/dt/test-countna.py +++ b/tests/dt/test-countna.py @@ -68,7 +68,6 @@ def test_dt_count_na1(src): RES = df[:, dt.countna(f[:])] assert_equals(EXP, RES) -@pytest.mark.xfail(reason="commented out till #3417 is resolved.") def test_dt_count_na2(): DT = dt.Frame(G=[1,1,1,2,2,2], V=[None, None, None, None, 3, 5]) EXP = dt.Frame(G=[1,2], V1=[3,1], V2=[1,0]) @@ -93,3 +92,4 @@ def test_dt_countna_None(): RES = DT[:, dt.countna()] assert EXP.to_list() == RES.to_list() + From 5207e67d4ab175064aab30a150aa57d55dea0d6b Mon Sep 17 00:00:00 2001 From: samukweku Date: Sat, 1 Apr 2023 15:32:56 +1100 Subject: [PATCH 051/124] cleanup --- src/core/column/countna_no_args.h | 6 +++--- src/core/expr/fexpr_count_countna.cc | 4 ++-- src/core/expr/fexpr_mean.cc | 10 +++++----- src/core/expr/fexpr_sumprod.cc | 11 +++++------ 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/core/column/countna_no_args.h b/src/core/column/countna_no_args.h index 0e6b1b405..5766de944 100644 --- a/src/core/column/countna_no_args.h +++ b/src/core/column/countna_no_args.h @@ -26,19 +26,19 @@ namespace dt { -class CountAllRows_ColumnImpl : public Virtual_ColumnImpl { +class CountRows_ColumnImpl : public Virtual_ColumnImpl { protected: Groupby gby_; public: - CountAllRows_ColumnImpl(const Groupby& gby) + CountRows_ColumnImpl(const Groupby& gby) : Virtual_ColumnImpl(gby.size(), SType::INT64), gby_(gby) {} ColumnImpl *clone() const override { - return new CountAllRows_ColumnImpl(Groupby(gby_)); + return new CountRows_ColumnImpl(Groupby(gby_)); } size_t n_children() const noexcept override { diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index cf3920ac8..3aacbb1a4 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -114,7 +114,7 @@ class FExpr_Count_Rows : public FExpr_Func { } if (ctx.has_groupby()) { - col = Column(new Latent_ColumnImpl(new CountAllRows_ColumnImpl(gby))); + col = Column(new Latent_ColumnImpl(new CountRows_ColumnImpl(gby))); } else { auto value = static_cast(ctx.nrows()); col = Const_ColumnImpl::make_int_column(1, value, SType::INT64); @@ -129,7 +129,7 @@ class FExpr_Count_Rows : public FExpr_Func { static py::oobj pyfn_count(const py::XArgs &args) { auto arg = args[0].to_oobj_or_none(); if (arg.is_none()) { - return PyFExpr::make(new FExpr_CountNA_AllRows()); + return PyFExpr::make(new FExpr_Count_Rows()); } return PyFExpr::make(new FExpr_Count(as_fexpr(arg))); } diff --git a/src/core/expr/fexpr_mean.cc b/src/core/expr/fexpr_mean.cc index 3a0f2c73b..99c70cb4e 100644 --- a/src/core/expr/fexpr_mean.cc +++ b/src/core/expr/fexpr_mean.cc @@ -58,10 +58,10 @@ class FExpr_Mean : public FExpr_ReduceUnary { case SType::DATE32: case SType::TIME64: case SType::FLOAT64: - col_out = make(std::move(col), gby, is_grouped); + col_out = make(std::move(col), SType::FLOAT64, gby, is_grouped); break; case SType::FLOAT32: - col_out = make(std::move(col), gby, is_grouped); + col_out = make(std::move(col), SType::FLOAT32, gby, is_grouped); break; default: throw TypeError() @@ -76,12 +76,12 @@ class FExpr_Mean : public FExpr_ReduceUnary { template - Column make(Column&& col, const Groupby& gby, bool is_grouped) const { + Column make(Column&& col, SType stype, const Groupby& gby, bool is_grouped) const { col.cast_inplace(stype); return is_grouped? std::move(col) : Column(new Latent_ColumnImpl(new Mean_ColumnImpl( - std::move(col), col.stype(), gby + std::move(col), stype, gby ))); } }; @@ -100,4 +100,4 @@ DECLARE_PYFN(&pyfn_mean) ->n_required_args(1); -}} // dt::expr \ No newline at end of file +}} // dt::expr diff --git a/src/core/expr/fexpr_sumprod.cc b/src/core/expr/fexpr_sumprod.cc index f5f93cee5..0a5205666 100644 --- a/src/core/expr/fexpr_sumprod.cc +++ b/src/core/expr/fexpr_sumprod.cc @@ -55,11 +55,11 @@ class FExpr_SumProd : public FExpr_ReduceUnary { case SType::INT16: case SType::INT32: case SType::INT64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::INT64, gby, is_grouped); case SType::FLOAT32: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::FLOAT32, gby, is_grouped); case SType::FLOAT64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::FLOAT64, gby, is_grouped); default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr(); @@ -72,11 +72,11 @@ class FExpr_SumProd : public FExpr_ReduceUnary { col.cast_inplace(stype); if (is_grouped) { return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( - std::move(col), col.stype(), gby + std::move(col), stype, gby ))); } else { return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( - std::move(col), col.stype(), gby + std::move(col), stype, gby ))); } } @@ -111,4 +111,3 @@ DECLARE_PYFN(&pyfn_prod) }} // dt::expr - From 88efc58575668d745d6b3f4e614408bc6e148411 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sat, 1 Apr 2023 15:40:12 +1100 Subject: [PATCH 052/124] updates based on feedback --- src/core/expr/fexpr_minmax.cc | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/core/expr/fexpr_minmax.cc b/src/core/expr/fexpr_minmax.cc index 0bba8253e..5feda06b5 100644 --- a/src/core/expr/fexpr_minmax.cc +++ b/src/core/expr/fexpr_minmax.cc @@ -52,21 +52,21 @@ class FExpr_MinMax : public FExpr_ReduceUnary { return Column(new ConstNa_ColumnImpl(gby.size(), stype)); case SType::BOOL: case SType::INT8: - return make(std::move(col), SType::INT8, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::INT16: - return make(std::move(col), SType::INT16, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::INT32: - return make(std::move(col), SType::INT32, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::DATE32: - return make(std::move(col), SType::DATE32, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::INT64: - return make(std::move(col), SType::INT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::TIME64: - return make(std::move(col), SType::TIME64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::FLOAT32: - return make(std::move(col), SType::FLOAT32, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::FLOAT64: - return make(std::move(col), SType::FLOAT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr(); @@ -75,10 +75,10 @@ class FExpr_MinMax : public FExpr_ReduceUnary { template - Column make(Column&& col, SType stype, const Groupby& gby, bool is_grouped) const { + Column make(Column&& col, const Groupby& gby, bool is_grouped) const { return is_grouped? std::move(col) : Column(new Latent_ColumnImpl(new MinMax_ColumnImpl( - std::move(col), stype, gby + std::move(col), col.stype(), gby ))); } From fcbdd797ec4c006ef524ab061682a98209242bd0 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 2 Apr 2023 13:32:41 +1000 Subject: [PATCH 053/124] no need to check for gby --- src/core/expr/fexpr_count_countna.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index 3aacbb1a4..e110fcde5 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -103,10 +103,6 @@ class FExpr_Count_Rows : public FExpr_Func { Groupby gby = ctx.get_groupby(); Column col; - if (!gby) { - gby = Groupby::single_group(wf.nrows()); - } - if (COUNTNA) { col = Const_ColumnImpl::make_int_column(gby.size(), 0, SType::INT64); wf.add_column(std::move(col), "countna", Grouping::GtoONE); From ac39c5f6efb47098ca448be662ad91599cdb9043 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 2 Apr 2023 13:33:52 +1000 Subject: [PATCH 054/124] fix indent --- src/core/expr/fexpr_count_countna.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index e110fcde5..6a863aa80 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -112,9 +112,9 @@ class FExpr_Count_Rows : public FExpr_Func { if (ctx.has_groupby()) { col = Column(new Latent_ColumnImpl(new CountRows_ColumnImpl(gby))); } else { - auto value = static_cast(ctx.nrows()); - col = Const_ColumnImpl::make_int_column(1, value, SType::INT64); - } + auto value = static_cast(ctx.nrows()); + col = Const_ColumnImpl::make_int_column(1, value, SType::INT64); + } wf.add_column(std::move(col), "count", Grouping::GtoONE); return wf; } From 2c567f597081d05e0ae6dfca3f00c8a467210e6a Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Tue, 18 Apr 2023 09:05:51 +1000 Subject: [PATCH 055/124] Update docs/api/dt/countna.rst Co-authored-by: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> --- docs/api/dt/countna.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api/dt/countna.rst b/docs/api/dt/countna.rst index 44c7f4a87..3c3ed4cf3 100644 --- a/docs/api/dt/countna.rst +++ b/docs/api/dt/countna.rst @@ -65,7 +65,7 @@ 0 | 2 [1 row x 1 column] - Get the count if no col is passed: + When no `cols` is passed, the number of missing values returned is zero: >>> df[:, dt.countna()] | C0 From 52be77fb0e935866b37ef756868375bb89018d5d Mon Sep 17 00:00:00 2001 From: samukweku Date: Tue, 18 Apr 2023 09:06:32 +1000 Subject: [PATCH 056/124] focus only on count --- src/datatable/expr/reduce.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index 2b726857d..d696abac1 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -109,17 +109,17 @@ def corr(col1, col2): # noinspection PyShadowingBuiltins -def sum(iterable=None, start=0): +def sum(iterable, start=0): if (not isinstance(iterable, dict) and (isinstance(iterable, core.FExpr) - or (iterable and hasattr(iterable, "__getitem__") and isinstance(iterable[0], core.FExpr)))): + or (hasattr(iterable, "__getitem__") and isinstance(iterable[0], core.FExpr)))): return core.sum(iterable) elif isinstance(iterable, dict) and isinstance([*iterable.values()][0], core.FExpr): return core.sum(iterable) elif isinstance(iterable, core.Frame): return iterable.sum() else: - return _builtin_sum(iterable, start) + return _builtin_sum(iterable, start) # noinspection PyShadowingBuiltins def min(*args, **kwds): From adf7143a6806b9cb9cbad18deb58e21ac98c8317 Mon Sep 17 00:00:00 2001 From: samukweku Date: Tue, 18 Apr 2023 09:07:23 +1000 Subject: [PATCH 057/124] return FExpr --- docs/api/dt/countna.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api/dt/countna.rst b/docs/api/dt/countna.rst index 3c3ed4cf3..0faa835e7 100644 --- a/docs/api/dt/countna.rst +++ b/docs/api/dt/countna.rst @@ -14,7 +14,7 @@ cols: FExpr Input columns. - return: Expr + return: FExpr f-expression having one row, and the same names and number of columns as in `cols`. All the returned column stypes are `int64`. If `cols` is not provided, 0 is returned per group. From 109c76fcb2024ae28f983ecb4b561d0d0077d6df Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Tue, 18 Apr 2023 16:31:59 +1000 Subject: [PATCH 058/124] Update countna.h --- src/core/column/countna.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/column/countna.h b/src/core/column/countna.h index c836866a7..7b488162c 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -57,3 +57,4 @@ class Count_ColumnImpl : public ReduceUnary_ColumnImpl { } // namespace dt #endif + From 8b5ac15f46ac4590f464a1a0d98137e8fb2ba482 Mon Sep 17 00:00:00 2001 From: samukweku Date: Thu, 9 Mar 2023 21:00:48 +1100 Subject: [PATCH 059/124] changes to allow more flexibility for reduction operations --- src/core/column/minmax.h | 2 +- src/core/column/reduce_unary.h | 5 ++-- src/core/column/sumprod.h | 2 +- src/core/expr/fexpr_mean.cc | 43 ++++++++++++++++++++++------------ src/core/expr/fexpr_sumprod.cc | 13 +++++----- 5 files changed, 39 insertions(+), 26 deletions(-) diff --git a/src/core/column/minmax.h b/src/core/column/minmax.h index 9bfdb2905..95fbd0599 100644 --- a/src/core/column/minmax.h +++ b/src/core/column/minmax.h @@ -57,7 +57,7 @@ class MinMax_ColumnImpl : public ReduceUnary_ColumnImpl { } } - *out = static_cast(res); + *out = static_cast(res); return !res_isna; } }; diff --git a/src/core/column/reduce_unary.h b/src/core/column/reduce_unary.h index 08bbdf8d0..3eeb39255 100644 --- a/src/core/column/reduce_unary.h +++ b/src/core/column/reduce_unary.h @@ -26,15 +26,16 @@ namespace dt { -template +template class ReduceUnary_ColumnImpl : public Virtual_ColumnImpl { protected: Column col_; Groupby gby_; + public: ReduceUnary_ColumnImpl(Column &&col, const Groupby& gby) - : Virtual_ColumnImpl(gby.size(), col.stype()), + : Virtual_ColumnImpl(gby.size(), stype_from), col_(std::move(col)), gby_(gby) { diff --git a/src/core/column/sumprod.h b/src/core/column/sumprod.h index d709a3a6f..10bb3a074 100644 --- a/src/core/column/sumprod.h +++ b/src/core/column/sumprod.h @@ -31,7 +31,7 @@ class SumProd_ColumnImpl : public ReduceUnary_ColumnImpl { public: using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; - bool get_element(size_t i, T* out) const override { + bool get_element(size_t i, U* out) const override { T result = !SUM; // 0 for `sum()` and 1 for `prod()` T value; size_t i0, i1; diff --git a/src/core/expr/fexpr_mean.cc b/src/core/expr/fexpr_mean.cc index 716de2182..162417367 100644 --- a/src/core/expr/fexpr_mean.cc +++ b/src/core/expr/fexpr_mean.cc @@ -52,17 +52,27 @@ class FExpr_Mean : public FExpr_ReduceUnary { )); case SType::BOOL: case SType::INT8: + return make(std::move(col), gby, is_grouped); case SType::INT16: - case SType::INT32: + return make(std::move(col), gby, is_grouped); + case SType::INT32: + return make(std::move(col), gby, is_grouped); case SType::INT64: - case SType::DATE32: - case SType::TIME64: + return make(std::move(col), gby, is_grouped); case SType::FLOAT64: - col_out = make(std::move(col), SType::FLOAT64, gby, is_grouped); - break; + return make(std::move(col), gby, is_grouped); case SType::FLOAT32: - col_out = make(std::move(col), SType::FLOAT32, gby, is_grouped); - break; + return make(std::move(col), gby, is_grouped); + case SType::DATE32: { + Column coli = make(std::move(col), gby, is_grouped); + coli.cast_inplace(SType::DATE32); + return coli; + } + case SType::TIME64: { + Column coli = make(std::move(col), gby, is_grouped); + coli.cast_inplace(SType::TIME64); + return coli; + } default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr(); @@ -75,14 +85,17 @@ class FExpr_Mean : public FExpr_ReduceUnary { } - template - Column make(Column&& col, SType stype, const Groupby& gby, bool is_grouped) const { - col.cast_inplace(stype); - - return is_grouped? std::move(col) - : Column(new Latent_ColumnImpl(new Mean_ColumnImpl( - std::move(col), gby - ))); + template + Column make(Column &&col, const Groupby& gby, bool is_grouped) const { + if (is_grouped) { + return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( + std::move(col), gby + ))); + } else { + return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( + std::move(col), gby + ))); + } } }; diff --git a/src/core/expr/fexpr_sumprod.cc b/src/core/expr/fexpr_sumprod.cc index ab1adc0bc..4daf57b84 100644 --- a/src/core/expr/fexpr_sumprod.cc +++ b/src/core/expr/fexpr_sumprod.cc @@ -55,11 +55,11 @@ class FExpr_SumProd : public FExpr_ReduceUnary { case SType::INT16: case SType::INT32: case SType::INT64: - return make(std::move(col), SType::INT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::FLOAT32: - return make(std::move(col), SType::FLOAT32, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::FLOAT64: - return make(std::move(col), SType::FLOAT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr(); @@ -68,14 +68,13 @@ class FExpr_SumProd : public FExpr_ReduceUnary { template - Column make(Column&& col, SType stype, const Groupby& gby, bool is_grouped) const { - col.cast_inplace(stype); + Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( + return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( std::move(col), gby ))); } else { - return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( + return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( std::move(col), gby ))); } From f4c65f0609c66c75f1b4f42ad108add68ab0bbdf Mon Sep 17 00:00:00 2001 From: samukweku Date: Fri, 10 Mar 2023 05:07:44 +1100 Subject: [PATCH 060/124] add countna with additional tests --- src/core/column/countna.h | 56 ++++++++++++++ src/core/expr/fexpr_countna.cc | 134 +++++++++++++++++++++++++++++++++ src/datatable/expr/expr.py | 1 - src/datatable/expr/reduce.py | 2 +- tests/dt/test-countna.py | 2 +- tests/types/test-void.py | 4 + 6 files changed, 196 insertions(+), 3 deletions(-) create mode 100644 src/core/column/countna.h create mode 100644 src/core/expr/fexpr_countna.cc diff --git a/src/core/column/countna.h b/src/core/column/countna.h new file mode 100644 index 000000000..d4ac14205 --- /dev/null +++ b/src/core/column/countna.h @@ -0,0 +1,56 @@ +//------------------------------------------------------------------------------ +// Copyright 2022 H2O.ai +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. +//------------------------------------------------------------------------------ +#ifndef dt_COLUMN_COUNTNA_h +#define dt_COLUMN_COUNTNA_h +#include "column/reduce_unary.h" +namespace dt { + + +template +class CountNA_ColumnImpl : public ReduceUnary_ColumnImpl { + public: + using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; + + bool get_element(size_t i, U* out) const override { + T value; + size_t i0, i1; + this->gby_.get_group(i, &i0, &i1); + int64_t count = 0; + + if (IS_GROUPED){ + bool isvalid = this->col_.get_element(i, &value); + count = isvalid? 0: static_cast(i1 - i0); + *out = count; + return true; + } else { + for (size_t gi = i0; gi < i1; ++gi) { + bool isvalid = this->col_.get_element(gi, &value); + count += !isvalid; + } + *out = count; + return true; // *out is not NA + } + } +}; + +} // namespace dt +#endif diff --git a/src/core/expr/fexpr_countna.cc b/src/core/expr/fexpr_countna.cc new file mode 100644 index 000000000..6cf4aa493 --- /dev/null +++ b/src/core/expr/fexpr_countna.cc @@ -0,0 +1,134 @@ +//------------------------------------------------------------------------------ +// Copyright 2022 H2O.ai +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. +//------------------------------------------------------------------------------ +#include "column/const.h" +#include "column/latent.h" +#include "column/countna.h" +#include "documentation.h" +#include "expr/fexpr_func.h" +#include "expr/eval_context.h" +#include "expr/workframe.h" +#include "python/xargs.h" +#include "stype.h" +namespace dt { +namespace expr { + + +class FExpr_CountNA : public FExpr_Func { + private: + ptrExpr arg_; + + public: + FExpr_CountNA(ptrExpr &&arg) + : arg_(std::move(arg)) {} + + std::string repr() const override { + std::string out = "countna"; + out += '('; + out += arg_->repr(); + out += ')'; + return out; + } + + + Workframe evaluate_n(EvalContext &ctx) const override { + Workframe outputs(ctx); + Workframe wf = arg_->evaluate_n(ctx); + Groupby gby = ctx.get_groupby(); + + if (!gby) { + gby = Groupby::single_group(wf.nrows()); + } + + for (size_t i = 0; i < wf.ncols(); ++i) { + bool is_grouped = ctx.has_group_column( + wf.get_frame_id(i), + wf.get_column_id(i) + ); + + Column coli = evaluate1(wf.retrieve_column(i), gby, is_grouped); + outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); + } + + return outputs; + } + + + Column evaluate1(Column &&col, const Groupby& gby, bool is_grouped) const { + SType stype = col.stype(); + + switch (stype) { + case SType::VOID: + case SType::BOOL: + case SType::INT8: + return make(std::move(col), gby, is_grouped); + case SType::INT16: + return make(std::move(col), gby, is_grouped); + case SType::DATE32: + case SType::INT32: + return make(std::move(col), gby, is_grouped); + case SType::TIME64: + case SType::INT64: + return make(std::move(col), gby, is_grouped); + case SType::FLOAT32: + return make(std::move(col), gby, is_grouped); + case SType::FLOAT64: + return make(std::move(col), gby, is_grouped); + case SType::STR32: + case SType::STR64: + return make(std::move(col), gby, is_grouped); + default: + throw TypeError() + << "Invalid column of type `" << stype << "` in " << repr(); + } + } + + + template + Column make(Column &&col, const Groupby& gby, bool is_grouped) const { + if (is_grouped) { + return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( + std::move(col), gby + ))); + } else { + return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( + std::move(col), gby + ))); + } + } +}; + + + +static py::oobj pyfn_countna(const py::XArgs &args) { + auto countna = args[0].to_oobj(); + return PyFExpr::make(new FExpr_CountNA(as_fexpr(countna))); +} + +DECLARE_PYFN(&pyfn_countna) + ->name("countna") + ->docs(doc_dt_countna) + ->arg_names({"cols"}) + ->n_positional_args(1) + ->n_required_args(1); + + +}} // dt::expr diff --git a/src/datatable/expr/expr.py b/src/datatable/expr/expr.py index a91782d4b..e5a342a68 100644 --- a/src/datatable/expr/expr.py +++ b/src/datatable/expr/expr.py @@ -59,7 +59,6 @@ class OpCodes(enum.Enum): MEDIAN = 410 COV = 411 CORR = 412 - COUNTNA = 413 NUNIQUE = 414 # Math: trigonometric diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index a8ee39bfe..06c4d9ffd 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -57,7 +57,7 @@ def nunique(iterable=None): def countna(iterable=None): - return Expr(OpCodes.COUNTNA, (iterable,)) + return core.countna(iterable) def first(iterable): diff --git a/tests/dt/test-countna.py b/tests/dt/test-countna.py index b30b4a8b9..f8660f41d 100644 --- a/tests/dt/test-countna.py +++ b/tests/dt/test-countna.py @@ -71,6 +71,6 @@ def test_dt_count_na1(src): def test_dt_count_na2(): DT = dt.Frame(G=[1,1,1,2,2,2], V=[None, None, None, None, 3, 5]) - EXP = dt.Frame(G=[1,2], V1=[3,1], V2=[3,0]) + EXP = dt.Frame(G=[1,2], V1=[3,1], V2=[1,0]) RES = DT[:, [dt.countna(f.V), dt.countna(dt.mean(f.V))], dt.by(f.G)] assert EXP.to_list() == RES.to_list() diff --git a/tests/types/test-void.py b/tests/types/test-void.py index d41845c89..da36e3040 100644 --- a/tests/types/test-void.py +++ b/tests/types/test-void.py @@ -216,6 +216,10 @@ def test_groupby_void_reducer(): DT = dt.Frame([None] * 5)[:, dt.count(), dt.by(0)] assert_equals(DT, dt.Frame(C0=[None], count=[5]/dt.int64)) +def test_groupby_void_countna(): + DT = dt.Frame([None] * 5)[:, dt.countna(f[0]), dt.by(0)] + assert_equals(DT, dt.Frame(C0=[None], C1=[5]/dt.int64)) + def test_groupby_void_twice(): # See issue #3108 From 2063deadda2ddcca44c689b5526a090046986610 Mon Sep 17 00:00:00 2001 From: samukweku Date: Fri, 10 Mar 2023 05:09:27 +1100 Subject: [PATCH 061/124] update countna doc link --- docs/api/dt/countna.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api/dt/countna.rst b/docs/api/dt/countna.rst index 078b4bb14..ff8570516 100644 --- a/docs/api/dt/countna.rst +++ b/docs/api/dt/countna.rst @@ -1,6 +1,6 @@ .. xfunction:: datatable.countna - :src: src/core/expr/head_reduce_unary.cc op_countna + :src: src/core/expr/fexpr_minmax.cc pyfn_countna :tests: tests/test-reduce.py :cvar: doc_dt_countna :signature: countna(cols) From e21e08ab28514fb6b7bbd8dad89b41a4f4e4bea6 Mon Sep 17 00:00:00 2001 From: samukweku Date: Fri, 10 Mar 2023 21:20:30 +1100 Subject: [PATCH 062/124] add count fexpr --- src/core/column/countna.h | 11 +- src/core/expr/fexpr_countna.cc | 23 +- src/core/expr/head_reduce_unary.cc | 336 ++++++++++++++--------------- src/datatable/expr/reduce.py | 2 +- 4 files changed, 194 insertions(+), 178 deletions(-) diff --git a/src/core/column/countna.h b/src/core/column/countna.h index d4ac14205..a42cc8431 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -25,7 +25,7 @@ namespace dt { -template +template class CountNA_ColumnImpl : public ReduceUnary_ColumnImpl { public: using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; @@ -38,13 +38,18 @@ class CountNA_ColumnImpl : public ReduceUnary_ColumnImpl { if (IS_GROUPED){ bool isvalid = this->col_.get_element(i, &value); - count = isvalid? 0: static_cast(i1 - i0); + if (COUNT){ + count = isvalid? static_cast(i1 - i0) : 0; + } else { + count = isvalid? 0: static_cast(i1 - i0); + } + *out = count; return true; } else { for (size_t gi = i0; gi < i1; ++gi) { bool isvalid = this->col_.get_element(gi, &value); - count += !isvalid; + count += COUNT? isvalid : !isvalid; } *out = count; return true; // *out is not NA diff --git a/src/core/expr/fexpr_countna.cc b/src/core/expr/fexpr_countna.cc index 6cf4aa493..7f6d48e54 100644 --- a/src/core/expr/fexpr_countna.cc +++ b/src/core/expr/fexpr_countna.cc @@ -31,7 +31,7 @@ namespace dt { namespace expr { - +template class FExpr_CountNA : public FExpr_Func { private: ptrExpr arg_; @@ -41,7 +41,7 @@ class FExpr_CountNA : public FExpr_Func { : arg_(std::move(arg)) {} std::string repr() const override { - std::string out = "countna"; + std::string out = COUNT? "count" : "countna"; out += '('; out += arg_->repr(); out += ')'; @@ -105,24 +105,35 @@ class FExpr_CountNA : public FExpr_Func { template Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( + return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( std::move(col), gby ))); } else { - return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( + return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( std::move(col), gby ))); } } }; - +static py::oobj pyfn_count(const py::XArgs &args) { + auto count = args[0].to_oobj_or_none(); + return PyFExpr::make(new FExpr_CountNA(as_fexpr(count))); +} static py::oobj pyfn_countna(const py::XArgs &args) { auto countna = args[0].to_oobj(); - return PyFExpr::make(new FExpr_CountNA(as_fexpr(countna))); + return PyFExpr::make(new FExpr_CountNA(as_fexpr(countna))); } + +DECLARE_PYFN(&pyfn_count) + ->name("count") + ->docs(doc_dt_count) + ->arg_names({"cols"}) + ->n_positional_args(1); + + DECLARE_PYFN(&pyfn_countna) ->name("countna") ->docs(doc_dt_countna) diff --git a/src/core/expr/head_reduce_unary.cc b/src/core/expr/head_reduce_unary.cc index 48cbb6200..6b129f2ae 100644 --- a/src/core/expr/head_reduce_unary.cc +++ b/src/core/expr/head_reduce_unary.cc @@ -308,170 +308,170 @@ static Column compute_gsd(Column&& arg, const Groupby& gby) { -//------------------------------------------------------------------------------ -// count(A) -//------------------------------------------------------------------------------ - -template -bool count_reducer(const Column& col, size_t i0, size_t i1, int64_t* out) { - int64_t count = 0; - for (size_t i = i0; i < i1; ++i) { - T value; - bool isvalid = col.get_element(i, &value); - count += isvalid; - } - *out = count; - return true; // *out is not NA -} - - -template -static Column _count(Column&& arg, const Groupby& gby) { - return Column( - new Latent_ColumnImpl( - new Reduced_ColumnImpl( - SType::INT64, std::move(arg), gby, count_reducer - ))); -} - - -static Column compute_count(Column&& arg, const Groupby& gby) { - switch (arg.stype()) { - case SType::VOID: return Column(new ConstInt_ColumnImpl( - gby.size(), 0, SType::INT64 - )); - case SType::BOOL: - case SType::INT8: return _count(std::move(arg), gby); - case SType::INT16: return _count(std::move(arg), gby); - case SType::DATE32: - case SType::INT32: return _count(std::move(arg), gby); - case SType::TIME64: - case SType::INT64: return _count(std::move(arg), gby); - case SType::FLOAT32: return _count(std::move(arg), gby); - case SType::FLOAT64: return _count(std::move(arg), gby); - case SType::STR32: - case SType::STR64: return _count(std::move(arg), gby); - default: throw _error("count", arg.stype()); - } -} - - -//------------------------------------------------------------------------------ -// countna -//------------------------------------------------------------------------------ - -template -bool op_countna(const Column& col, size_t i0, size_t i1, int64_t* out) { - int64_t count = 0; - for (size_t i = i0; i < i1; ++i) { - T value; - bool isvalid = col.get_element(i, &value); - count += !isvalid; - } - *out = count; - return true; // *out is not NA -} - - - -template -static Column _countna(Column&& arg, const Groupby& gby) { - return Column( - new Latent_ColumnImpl( - new Reduced_ColumnImpl( - SType::INT64, std::move(arg), gby, op_countna - ))); -} - -static Column compute_countna(Column&& arg, const Groupby& gby) { - switch (arg.stype()) { - case SType::VOID: - case SType::BOOL: - case SType::INT8: return _countna(std::move(arg), gby); - case SType::INT16: return _countna(std::move(arg), gby); - case SType::DATE32: - case SType::INT32: return _countna(std::move(arg), gby); - case SType::TIME64: - case SType::INT64: return _countna(std::move(arg), gby); - case SType::FLOAT32: return _countna(std::move(arg), gby); - case SType::FLOAT64: return _countna(std::move(arg), gby); - case SType::STR32: - case SType::STR64: return _countna(std::move(arg), gby); - default: throw _error("countna", arg.stype()); - } -} - - -//------------------------------------------------------------------------------ -// count/countna(A:grouped) -//------------------------------------------------------------------------------ - -// T is the type of the input column -template -class CountGrouped_ColumnImpl : public Virtual_ColumnImpl -{ - private: - Column arg; - Groupby groupby; - - public: - CountGrouped_ColumnImpl(Column&& col, const Groupby& grpby) - : Virtual_ColumnImpl(grpby.size(), SType::INT64), - arg(std::move(col)), - groupby(grpby) {} - - ColumnImpl* clone() const override { - return new CountGrouped_ColumnImpl(Column(arg), groupby); - } - - bool get_element(size_t i, int64_t* out) const override { - T value; - bool isvalid = arg.get_element(i, &value); - if (isvalid ^ NA) { - size_t i0, i1; - groupby.get_group(i, &i0, &i1); - *out = static_cast(i1 - i0); - } else { - *out = 0; - } - return true; - } - - size_t n_children() const noexcept override { - return 1; - } - - const Column& child(size_t i) const override { - xassert(i == 0); (void)i; - return arg; - } - -}; - - -template -static Column _gcount(Column&& arg, const Groupby& gby) { - return Column(new CountGrouped_ColumnImpl(std::move(arg), gby)); -} - -template -static Column compute_gcount(Column&& arg, const Groupby& gby) { - switch (arg.stype()) { - case SType::VOID: return Column(new ConstInt_ColumnImpl(1, 0, SType::INT64)); - case SType::BOOL: - case SType::INT8: return _gcount(std::move(arg), gby); - case SType::INT16: return _gcount(std::move(arg), gby); - case SType::DATE32: - case SType::INT32: return _gcount(std::move(arg), gby); - case SType::TIME64: - case SType::INT64: return _gcount(std::move(arg), gby); - case SType::FLOAT32: return _gcount(std::move(arg), gby); - case SType::FLOAT64: return _gcount(std::move(arg), gby); - case SType::STR32: - case SType::STR64: return _gcount(std::move(arg), gby); - default: throw _error("count", arg.stype()); - } -} +// //------------------------------------------------------------------------------ +// // count(A) +// //------------------------------------------------------------------------------ + +// template +// bool count_reducer(const Column& col, size_t i0, size_t i1, int64_t* out) { +// int64_t count = 0; +// for (size_t i = i0; i < i1; ++i) { +// T value; +// bool isvalid = col.get_element(i, &value); +// count += isvalid; +// } +// *out = count; +// return true; // *out is not NA +// } + + +// template +// static Column _count(Column&& arg, const Groupby& gby) { +// return Column( +// new Latent_ColumnImpl( +// new Reduced_ColumnImpl( +// SType::INT64, std::move(arg), gby, count_reducer +// ))); +// } + + +// static Column compute_count(Column&& arg, const Groupby& gby) { +// switch (arg.stype()) { +// case SType::VOID: return Column(new ConstInt_ColumnImpl( +// gby.size(), 0, SType::INT64 +// )); +// case SType::BOOL: +// case SType::INT8: return _count(std::move(arg), gby); +// case SType::INT16: return _count(std::move(arg), gby); +// case SType::DATE32: +// case SType::INT32: return _count(std::move(arg), gby); +// case SType::TIME64: +// case SType::INT64: return _count(std::move(arg), gby); +// case SType::FLOAT32: return _count(std::move(arg), gby); +// case SType::FLOAT64: return _count(std::move(arg), gby); +// case SType::STR32: +// case SType::STR64: return _count(std::move(arg), gby); +// default: throw _error("count", arg.stype()); +// } +// } + + +// //------------------------------------------------------------------------------ +// // countna +// //------------------------------------------------------------------------------ + +// template +// bool op_countna(const Column& col, size_t i0, size_t i1, int64_t* out) { +// int64_t count = 0; +// for (size_t i = i0; i < i1; ++i) { +// T value; +// bool isvalid = col.get_element(i, &value); +// count += !isvalid; +// } +// *out = count; +// return true; // *out is not NA +// } + + + +// template +// static Column _countna(Column&& arg, const Groupby& gby) { +// return Column( +// new Latent_ColumnImpl( +// new Reduced_ColumnImpl( +// SType::INT64, std::move(arg), gby, op_countna +// ))); +// } + +// static Column compute_countna(Column&& arg, const Groupby& gby) { +// switch (arg.stype()) { +// case SType::VOID: +// case SType::BOOL: +// case SType::INT8: return _countna(std::move(arg), gby); +// case SType::INT16: return _countna(std::move(arg), gby); +// case SType::DATE32: +// case SType::INT32: return _countna(std::move(arg), gby); +// case SType::TIME64: +// case SType::INT64: return _countna(std::move(arg), gby); +// case SType::FLOAT32: return _countna(std::move(arg), gby); +// case SType::FLOAT64: return _countna(std::move(arg), gby); +// case SType::STR32: +// case SType::STR64: return _countna(std::move(arg), gby); +// default: throw _error("countna", arg.stype()); +// } +// } + + +// //------------------------------------------------------------------------------ +// // count/countna(A:grouped) +// //------------------------------------------------------------------------------ + +// // T is the type of the input column +// template +// class CountGrouped_ColumnImpl : public Virtual_ColumnImpl +// { +// private: +// Column arg; +// Groupby groupby; + +// public: +// CountGrouped_ColumnImpl(Column&& col, const Groupby& grpby) +// : Virtual_ColumnImpl(grpby.size(), SType::INT64), +// arg(std::move(col)), +// groupby(grpby) {} + +// ColumnImpl* clone() const override { +// return new CountGrouped_ColumnImpl(Column(arg), groupby); +// } + +// bool get_element(size_t i, int64_t* out) const override { +// T value; +// bool isvalid = arg.get_element(i, &value); +// if (isvalid ^ NA) { +// size_t i0, i1; +// groupby.get_group(i, &i0, &i1); +// *out = static_cast(i1 - i0); +// } else { +// *out = 0; +// } +// return true; +// } + +// size_t n_children() const noexcept override { +// return 1; +// } + +// const Column& child(size_t i) const override { +// xassert(i == 0); (void)i; +// return arg; +// } + +// }; + + +// template +// static Column _gcount(Column&& arg, const Groupby& gby) { +// return Column(new CountGrouped_ColumnImpl(std::move(arg), gby)); +// } + +// template +// static Column compute_gcount(Column&& arg, const Groupby& gby) { +// switch (arg.stype()) { +// case SType::VOID: return Column(new ConstInt_ColumnImpl(1, 0, SType::INT64)); +// case SType::BOOL: +// case SType::INT8: return _gcount(std::move(arg), gby); +// case SType::INT16: return _gcount(std::move(arg), gby); +// case SType::DATE32: +// case SType::INT32: return _gcount(std::move(arg), gby); +// case SType::TIME64: +// case SType::INT64: return _gcount(std::move(arg), gby); +// case SType::FLOAT32: return _gcount(std::move(arg), gby); +// case SType::FLOAT64: return _gcount(std::move(arg), gby); +// case SType::STR32: +// case SType::STR64: return _gcount(std::move(arg), gby); +// default: throw _error("count", arg.stype()); +// } +// } @@ -713,8 +713,8 @@ Workframe Head_Reduce_Unary::evaluate_n( case Op::STDEV: fn = compute_sd; break; case Op::FIRST: fn = compute_firstlast; break; case Op::LAST: fn = compute_firstlast; break; - case Op::COUNT: fn = compute_count; break; - case Op::COUNTNA:fn = compute_countna; break; + //case Op::COUNT: fn = compute_count; break; + //case Op::COUNTNA:fn = compute_countna; break; case Op::MEDIAN: fn = compute_median; break; case Op::NUNIQUE:fn = compute_nunique; break; default: throw TypeError() << "Unknown reducer function: " @@ -725,8 +725,8 @@ Workframe Head_Reduce_Unary::evaluate_n( case Op::STDEV: fn = compute_gsd; break; case Op::FIRST: case Op::LAST: fn = compute_gfirstlast; break; - case Op::COUNT: fn = compute_gcount; break; - case Op::COUNTNA:fn = compute_gcount; break; + //case Op::COUNT: fn = compute_gcount; break; + //case Op::COUNTNA:fn = compute_gcount; break; case Op::MEDIAN: fn = compute_gmedian; break; case Op::NUNIQUE:fn = compute_gnunique; break; default: throw TypeError() << "Unknown reducer function: " diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index 06c4d9ffd..f692d6c81 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -45,7 +45,7 @@ def count(iterable=None): if isinstance(iterable, (Expr, core.FExpr)): - return Expr(OpCodes.COUNT, (iterable,)) + return core.count(iterable) elif iterable is None: return Expr(OpCodes.COUNT0, ()) else: From 633e3b19ee11d404608d0e30807ab03c36139617 Mon Sep 17 00:00:00 2001 From: samukweku Date: Fri, 10 Mar 2023 22:02:25 +1100 Subject: [PATCH 063/124] add count for all rows --- src/core/column/countna.h | 8 ++++++-- src/core/expr/fexpr_countna.cc | 23 +++++++++++++++-------- src/datatable/expr/reduce.py | 3 ++- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/core/column/countna.h b/src/core/column/countna.h index a42cc8431..8b506875f 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -25,7 +25,7 @@ namespace dt { -template +template class CountNA_ColumnImpl : public ReduceUnary_ColumnImpl { public: using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; @@ -36,7 +36,11 @@ class CountNA_ColumnImpl : public ReduceUnary_ColumnImpl { this->gby_.get_group(i, &i0, &i1); int64_t count = 0; - if (IS_GROUPED){ + if (COUNTT) { + *out = static_cast(i1 - i0); + return true; + } + else if (IS_GROUPED){ bool isvalid = this->col_.get_element(i, &value); if (COUNT){ count = isvalid? static_cast(i1 - i0) : 0; diff --git a/src/core/expr/fexpr_countna.cc b/src/core/expr/fexpr_countna.cc index 7f6d48e54..ccac5710a 100644 --- a/src/core/expr/fexpr_countna.cc +++ b/src/core/expr/fexpr_countna.cc @@ -31,7 +31,7 @@ namespace dt { namespace expr { -template +template class FExpr_CountNA : public FExpr_Func { private: ptrExpr arg_; @@ -43,7 +43,7 @@ class FExpr_CountNA : public FExpr_Func { std::string repr() const override { std::string out = COUNT? "count" : "countna"; out += '('; - out += arg_->repr(); + if (!COUNTT) out += arg_->repr(); out += ')'; return out; } @@ -64,8 +64,12 @@ class FExpr_CountNA : public FExpr_Func { wf.get_column_id(i) ); - Column coli = evaluate1(wf.retrieve_column(i), gby, is_grouped); - outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); + Column coli = evaluate1(wf.retrieve_column(i), gby, is_grouped); + if (COUNTT) { + outputs.add_column(std::move(coli), "count", Grouping::GtoONE); + } else { + outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); + } } return outputs; @@ -105,11 +109,11 @@ class FExpr_CountNA : public FExpr_Func { template Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( + return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( std::move(col), gby ))); } else { - return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( + return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( std::move(col), gby ))); } @@ -118,12 +122,15 @@ class FExpr_CountNA : public FExpr_Func { static py::oobj pyfn_count(const py::XArgs &args) { auto count = args[0].to_oobj_or_none(); - return PyFExpr::make(new FExpr_CountNA(as_fexpr(count))); + if (count.is_none()) { + return PyFExpr::make(new FExpr_CountNA(as_fexpr(count))); + } + return PyFExpr::make(new FExpr_CountNA(as_fexpr(count))); } static py::oobj pyfn_countna(const py::XArgs &args) { auto countna = args[0].to_oobj(); - return PyFExpr::make(new FExpr_CountNA(as_fexpr(countna))); + return PyFExpr::make(new FExpr_CountNA(as_fexpr(countna))); } diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index f692d6c81..72f219a7f 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -47,7 +47,8 @@ def count(iterable=None): if isinstance(iterable, (Expr, core.FExpr)): return core.count(iterable) elif iterable is None: - return Expr(OpCodes.COUNT0, ()) + return core.count(iterable) + #return Expr(OpCodes.COUNT0, ()) else: return _builtin_sum((x is not None) for x in iterable) From 6dea1a6922415f56655b2c3c9d3653a3a9061f0e Mon Sep 17 00:00:00 2001 From: samukweku Date: Fri, 10 Mar 2023 22:09:04 +1100 Subject: [PATCH 064/124] simplify logic choice --- src/datatable/expr/reduce.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index 72f219a7f..e094654a6 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -44,11 +44,8 @@ def count(iterable=None): - if isinstance(iterable, (Expr, core.FExpr)): - return core.count(iterable) - elif iterable is None: + if isinstance(iterable, (Expr, core.FExpr)) or (iterable is None): return core.count(iterable) - #return Expr(OpCodes.COUNT0, ()) else: return _builtin_sum((x is not None) for x in iterable) From 2a4b4f8f8ac9a9ac11688d01f712df8d45d5ab10 Mon Sep 17 00:00:00 2001 From: samukweku Date: Fri, 10 Mar 2023 22:27:22 +1100 Subject: [PATCH 065/124] update docs links --- docs/api/dt/count.rst | 2 +- docs/api/dt/countna.rst | 2 +- src/core/column/countna.h | 3 +-- src/core/expr/{fexpr_countna.cc => fexpr_count_countna.cc} | 0 4 files changed, 3 insertions(+), 4 deletions(-) rename src/core/expr/{fexpr_countna.cc => fexpr_count_countna.cc} (100%) diff --git a/docs/api/dt/count.rst b/docs/api/dt/count.rst index a21e38440..951a25bfb 100644 --- a/docs/api/dt/count.rst +++ b/docs/api/dt/count.rst @@ -1,6 +1,6 @@ .. xfunction:: datatable.count - :src: src/core/expr/head_reduce_unary.cc count_reducer + :src: src/core/expr/fexpr_count_countna.cc pyfn_count :cvar: doc_dt_count :tests: tests/test-reduce.py :signature: count(cols) diff --git a/docs/api/dt/countna.rst b/docs/api/dt/countna.rst index ff8570516..fafbc0f1e 100644 --- a/docs/api/dt/countna.rst +++ b/docs/api/dt/countna.rst @@ -1,6 +1,6 @@ .. xfunction:: datatable.countna - :src: src/core/expr/fexpr_minmax.cc pyfn_countna + :src: src/core/expr/fexpr_count_countna.cc pyfn_countna :tests: tests/test-reduce.py :cvar: doc_dt_countna :signature: countna(cols) diff --git a/src/core/column/countna.h b/src/core/column/countna.h index 8b506875f..21d53b65e 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -46,8 +46,7 @@ class CountNA_ColumnImpl : public ReduceUnary_ColumnImpl { count = isvalid? static_cast(i1 - i0) : 0; } else { count = isvalid? 0: static_cast(i1 - i0); - } - + } *out = count; return true; } else { diff --git a/src/core/expr/fexpr_countna.cc b/src/core/expr/fexpr_count_countna.cc similarity index 100% rename from src/core/expr/fexpr_countna.cc rename to src/core/expr/fexpr_count_countna.cc From 45023228c880ee6abbdee033809e7b41d29c4018 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sat, 11 Mar 2023 11:06:02 +1100 Subject: [PATCH 066/124] update code based on feedback --- src/core/column/countna.h | 10 +- src/core/expr/fexpr_count_countna.cc | 20 ++-- src/core/expr/head_reduce_unary.cc | 173 --------------------------- 3 files changed, 18 insertions(+), 185 deletions(-) diff --git a/src/core/column/countna.h b/src/core/column/countna.h index 21d53b65e..83e21bb42 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2022 H2O.ai +// Copyright 2023 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -25,7 +25,7 @@ namespace dt { -template +template class CountNA_ColumnImpl : public ReduceUnary_ColumnImpl { public: using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; @@ -36,13 +36,13 @@ class CountNA_ColumnImpl : public ReduceUnary_ColumnImpl { this->gby_.get_group(i, &i0, &i1); int64_t count = 0; - if (COUNTT) { + if (COUNT_ALL_ROWS) { *out = static_cast(i1 - i0); return true; } else if (IS_GROUPED){ bool isvalid = this->col_.get_element(i, &value); - if (COUNT){ + if (COUNT_NOT_NULL){ count = isvalid? static_cast(i1 - i0) : 0; } else { count = isvalid? 0: static_cast(i1 - i0); @@ -52,7 +52,7 @@ class CountNA_ColumnImpl : public ReduceUnary_ColumnImpl { } else { for (size_t gi = i0; gi < i1; ++gi) { bool isvalid = this->col_.get_element(gi, &value); - count += COUNT? isvalid : !isvalid; + count += COUNT_NOT_NULL? isvalid : !isvalid; } *out = count; return true; // *out is not NA diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index ccac5710a..2943d6f64 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2022 H2O.ai +// Copyright 2023 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -31,7 +31,7 @@ namespace dt { namespace expr { -template +template class FExpr_CountNA : public FExpr_Func { private: ptrExpr arg_; @@ -41,9 +41,9 @@ class FExpr_CountNA : public FExpr_Func { : arg_(std::move(arg)) {} std::string repr() const override { - std::string out = COUNT? "count" : "countna"; + std::string out = COUNT_NOT_NULL? "count" : "countna"; out += '('; - if (!COUNTT) out += arg_->repr(); + if (!COUNT_ALL_ROWS) out += arg_->repr(); out += ')'; return out; } @@ -54,6 +54,12 @@ class FExpr_CountNA : public FExpr_Func { Workframe wf = arg_->evaluate_n(ctx); Groupby gby = ctx.get_groupby(); + if (!gby && COUNT_ALL_ROWS) { + int64_t nrows = static_cast(ctx.nrows()); + Column coli = Column(new ConstInt_ColumnImpl(1, nrows, SType::INT64)); + outputs.add_column(std::move(coli), "count", Grouping::GtoONE); + return outputs; + } if (!gby) { gby = Groupby::single_group(wf.nrows()); } @@ -65,7 +71,7 @@ class FExpr_CountNA : public FExpr_Func { ); Column coli = evaluate1(wf.retrieve_column(i), gby, is_grouped); - if (COUNTT) { + if (COUNT_ALL_ROWS) { outputs.add_column(std::move(coli), "count", Grouping::GtoONE); } else { outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); @@ -109,11 +115,11 @@ class FExpr_CountNA : public FExpr_Func { template Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( + return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( std::move(col), gby ))); } else { - return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( + return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( std::move(col), gby ))); } diff --git a/src/core/expr/head_reduce_unary.cc b/src/core/expr/head_reduce_unary.cc index 6b129f2ae..f7bceaf66 100644 --- a/src/core/expr/head_reduce_unary.cc +++ b/src/core/expr/head_reduce_unary.cc @@ -308,175 +308,6 @@ static Column compute_gsd(Column&& arg, const Groupby& gby) { -// //------------------------------------------------------------------------------ -// // count(A) -// //------------------------------------------------------------------------------ - -// template -// bool count_reducer(const Column& col, size_t i0, size_t i1, int64_t* out) { -// int64_t count = 0; -// for (size_t i = i0; i < i1; ++i) { -// T value; -// bool isvalid = col.get_element(i, &value); -// count += isvalid; -// } -// *out = count; -// return true; // *out is not NA -// } - - -// template -// static Column _count(Column&& arg, const Groupby& gby) { -// return Column( -// new Latent_ColumnImpl( -// new Reduced_ColumnImpl( -// SType::INT64, std::move(arg), gby, count_reducer -// ))); -// } - - -// static Column compute_count(Column&& arg, const Groupby& gby) { -// switch (arg.stype()) { -// case SType::VOID: return Column(new ConstInt_ColumnImpl( -// gby.size(), 0, SType::INT64 -// )); -// case SType::BOOL: -// case SType::INT8: return _count(std::move(arg), gby); -// case SType::INT16: return _count(std::move(arg), gby); -// case SType::DATE32: -// case SType::INT32: return _count(std::move(arg), gby); -// case SType::TIME64: -// case SType::INT64: return _count(std::move(arg), gby); -// case SType::FLOAT32: return _count(std::move(arg), gby); -// case SType::FLOAT64: return _count(std::move(arg), gby); -// case SType::STR32: -// case SType::STR64: return _count(std::move(arg), gby); -// default: throw _error("count", arg.stype()); -// } -// } - - -// //------------------------------------------------------------------------------ -// // countna -// //------------------------------------------------------------------------------ - -// template -// bool op_countna(const Column& col, size_t i0, size_t i1, int64_t* out) { -// int64_t count = 0; -// for (size_t i = i0; i < i1; ++i) { -// T value; -// bool isvalid = col.get_element(i, &value); -// count += !isvalid; -// } -// *out = count; -// return true; // *out is not NA -// } - - - -// template -// static Column _countna(Column&& arg, const Groupby& gby) { -// return Column( -// new Latent_ColumnImpl( -// new Reduced_ColumnImpl( -// SType::INT64, std::move(arg), gby, op_countna -// ))); -// } - -// static Column compute_countna(Column&& arg, const Groupby& gby) { -// switch (arg.stype()) { -// case SType::VOID: -// case SType::BOOL: -// case SType::INT8: return _countna(std::move(arg), gby); -// case SType::INT16: return _countna(std::move(arg), gby); -// case SType::DATE32: -// case SType::INT32: return _countna(std::move(arg), gby); -// case SType::TIME64: -// case SType::INT64: return _countna(std::move(arg), gby); -// case SType::FLOAT32: return _countna(std::move(arg), gby); -// case SType::FLOAT64: return _countna(std::move(arg), gby); -// case SType::STR32: -// case SType::STR64: return _countna(std::move(arg), gby); -// default: throw _error("countna", arg.stype()); -// } -// } - - -// //------------------------------------------------------------------------------ -// // count/countna(A:grouped) -// //------------------------------------------------------------------------------ - -// // T is the type of the input column -// template -// class CountGrouped_ColumnImpl : public Virtual_ColumnImpl -// { -// private: -// Column arg; -// Groupby groupby; - -// public: -// CountGrouped_ColumnImpl(Column&& col, const Groupby& grpby) -// : Virtual_ColumnImpl(grpby.size(), SType::INT64), -// arg(std::move(col)), -// groupby(grpby) {} - -// ColumnImpl* clone() const override { -// return new CountGrouped_ColumnImpl(Column(arg), groupby); -// } - -// bool get_element(size_t i, int64_t* out) const override { -// T value; -// bool isvalid = arg.get_element(i, &value); -// if (isvalid ^ NA) { -// size_t i0, i1; -// groupby.get_group(i, &i0, &i1); -// *out = static_cast(i1 - i0); -// } else { -// *out = 0; -// } -// return true; -// } - -// size_t n_children() const noexcept override { -// return 1; -// } - -// const Column& child(size_t i) const override { -// xassert(i == 0); (void)i; -// return arg; -// } - -// }; - - -// template -// static Column _gcount(Column&& arg, const Groupby& gby) { -// return Column(new CountGrouped_ColumnImpl(std::move(arg), gby)); -// } - -// template -// static Column compute_gcount(Column&& arg, const Groupby& gby) { -// switch (arg.stype()) { -// case SType::VOID: return Column(new ConstInt_ColumnImpl(1, 0, SType::INT64)); -// case SType::BOOL: -// case SType::INT8: return _gcount(std::move(arg), gby); -// case SType::INT16: return _gcount(std::move(arg), gby); -// case SType::DATE32: -// case SType::INT32: return _gcount(std::move(arg), gby); -// case SType::TIME64: -// case SType::INT64: return _gcount(std::move(arg), gby); -// case SType::FLOAT32: return _gcount(std::move(arg), gby); -// case SType::FLOAT64: return _gcount(std::move(arg), gby); -// case SType::STR32: -// case SType::STR64: return _gcount(std::move(arg), gby); -// default: throw _error("count", arg.stype()); -// } -// } - - - - - //------------------------------------------------------------------------------ // nunique(A:grouped) //------------------------------------------------------------------------------ @@ -713,8 +544,6 @@ Workframe Head_Reduce_Unary::evaluate_n( case Op::STDEV: fn = compute_sd; break; case Op::FIRST: fn = compute_firstlast; break; case Op::LAST: fn = compute_firstlast; break; - //case Op::COUNT: fn = compute_count; break; - //case Op::COUNTNA:fn = compute_countna; break; case Op::MEDIAN: fn = compute_median; break; case Op::NUNIQUE:fn = compute_nunique; break; default: throw TypeError() << "Unknown reducer function: " @@ -725,8 +554,6 @@ Workframe Head_Reduce_Unary::evaluate_n( case Op::STDEV: fn = compute_gsd; break; case Op::FIRST: case Op::LAST: fn = compute_gfirstlast; break; - //case Op::COUNT: fn = compute_gcount; break; - //case Op::COUNTNA:fn = compute_gcount; break; case Op::MEDIAN: fn = compute_gmedian; break; case Op::NUNIQUE:fn = compute_gnunique; break; default: throw TypeError() << "Unknown reducer function: " From fc4bd05480c4e268f2d6c7dfbc480809472e40d3 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sat, 11 Mar 2023 11:40:01 +1100 Subject: [PATCH 067/124] cleanup --- src/core/expr/fexpr_count_countna.cc | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index 2943d6f64..d8da59dbc 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -28,6 +28,7 @@ #include "expr/workframe.h" #include "python/xargs.h" #include "stype.h" +#include namespace dt { namespace expr { @@ -68,23 +69,20 @@ class FExpr_CountNA : public FExpr_Func { bool is_grouped = ctx.has_group_column( wf.get_frame_id(i), wf.get_column_id(i) - ); - + ); Column coli = evaluate1(wf.retrieve_column(i), gby, is_grouped); if (COUNT_ALL_ROWS) { outputs.add_column(std::move(coli), "count", Grouping::GtoONE); } else { outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); } - } - + } return outputs; } Column evaluate1(Column &&col, const Groupby& gby, bool is_grouped) const { SType stype = col.stype(); - switch (stype) { case SType::VOID: case SType::BOOL: From 42a5cfda1e9db6291abffbf788f1d23d07cad17a Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 12 Mar 2023 00:45:51 +1100 Subject: [PATCH 068/124] updates based on feedback --- src/core/column/count_all_rows.h | 72 ++++++++++++++++++++++++++++ src/core/column/countna.h | 21 ++++---- src/core/expr/fexpr_count_countna.cc | 43 ++++++++++------- 3 files changed, 105 insertions(+), 31 deletions(-) create mode 100644 src/core/column/count_all_rows.h diff --git a/src/core/column/count_all_rows.h b/src/core/column/count_all_rows.h new file mode 100644 index 000000000..c23746c15 --- /dev/null +++ b/src/core/column/count_all_rows.h @@ -0,0 +1,72 @@ +//------------------------------------------------------------------------------ +// Copyright 2022 H2O.ai +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. +//------------------------------------------------------------------------------ +#ifndef dt_COLUMN_COUNTALLROWS_h +#define dt_COLUMN_COUNTALLROWS_h +#include "column/virtual.h" +#include "parallel/api.h" +#include "stype.h" +namespace dt { + + +class CountAllRows_ColumnImpl : public Virtual_ColumnImpl { + private: + Groupby gby_; + + public: + CountAllRows_ColumnImpl(const Groupby& gby) + : Virtual_ColumnImpl(gby.size(), SType::INT64), + gby_(gby) + {} + + + ColumnImpl* clone() const override { + return new CountAllRows_ColumnImpl(gby_); + } + + + size_t n_children() const noexcept override { + return 0; + } + + void materialize(Column &col_out, bool) override { + size_t nrows = gby_.size(); + const int32_t* offsets = gby_.offsets_r(); + Column col = Column::new_data_column(nrows, SType::INT64); + auto data = static_cast(col.get_data_editable()); + dt::parallel_for_dynamic(gby_.size(), + [&](size_t gi) { + for (size_t i = 0; i < nrows; ++i) { + data[i] = offsets[i + 1] - offsets[i]; + } + } + ); + + col_out = std::move(col); + } + +}; + + +} // namespace dt + + +#endif diff --git a/src/core/column/countna.h b/src/core/column/countna.h index 83e21bb42..7752b0801 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -25,8 +25,8 @@ namespace dt { -template -class CountNA_ColumnImpl : public ReduceUnary_ColumnImpl { +template +class Count_ColumnImpl : public ReduceUnary_ColumnImpl { public: using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; @@ -35,24 +35,19 @@ class CountNA_ColumnImpl : public ReduceUnary_ColumnImpl { size_t i0, i1; this->gby_.get_group(i, &i0, &i1); int64_t count = 0; - - if (COUNT_ALL_ROWS) { - *out = static_cast(i1 - i0); - return true; - } - else if (IS_GROUPED){ + if (IS_GROUPED){ bool isvalid = this->col_.get_element(i, &value); - if (COUNT_NOT_NULL){ - count = isvalid? static_cast(i1 - i0) : 0; + if (COUNTNA){ + count = isvalid? 0: static_cast(i1 - i0); } else { - count = isvalid? 0: static_cast(i1 - i0); + count = isvalid? static_cast(i1 - i0) : 0; } *out = count; - return true; + return true; // *out is not NA } else { for (size_t gi = i0; gi < i1; ++gi) { bool isvalid = this->col_.get_element(gi, &value); - count += COUNT_NOT_NULL? isvalid : !isvalid; + count += COUNTNA? !isvalid : isvalid; } *out = count; return true; // *out is not NA diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index d8da59dbc..5ec8dda42 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -22,6 +22,7 @@ #include "column/const.h" #include "column/latent.h" #include "column/countna.h" +#include "column/count_all_rows.h" #include "documentation.h" #include "expr/fexpr_func.h" #include "expr/eval_context.h" @@ -32,7 +33,7 @@ namespace dt { namespace expr { -template +template class FExpr_CountNA : public FExpr_Func { private: ptrExpr arg_; @@ -42,9 +43,9 @@ class FExpr_CountNA : public FExpr_Func { : arg_(std::move(arg)) {} std::string repr() const override { - std::string out = COUNT_NOT_NULL? "count" : "countna"; + std::string out = COUNTNA? "countna" : "count"; out += '('; - if (!COUNT_ALL_ROWS) out += arg_->repr(); + if (arg_->get_expr_kind() != Kind::None) out += arg_->repr(); out += ')'; return out; } @@ -54,13 +55,26 @@ class FExpr_CountNA : public FExpr_Func { Workframe outputs(ctx); Workframe wf = arg_->evaluate_n(ctx); Groupby gby = ctx.get_groupby(); + // this covers a scenario where + // we dont care about the presence or absence of NAs + // we just want the total number of rows + bool count_all_rows = arg_->get_expr_kind() == Kind::None; + + if (count_all_rows && !gby) { + auto value = static_cast(ctx.nrows()); + Column coli = Const_ColumnImpl::make_int_column(1, value, SType::INT64); + outputs.add_column(std::move(coli), "count", Grouping::GtoONE); + return outputs; + } - if (!gby && COUNT_ALL_ROWS) { - int64_t nrows = static_cast(ctx.nrows()); - Column coli = Column(new ConstInt_ColumnImpl(1, nrows, SType::INT64)); + if (count_all_rows && gby) { + Column coli = Column(new Latent_ColumnImpl( + new CountAllRows_ColumnImpl(gby) + )); outputs.add_column(std::move(coli), "count", Grouping::GtoONE); return outputs; } + if (!gby) { gby = Groupby::single_group(wf.nrows()); } @@ -71,11 +85,7 @@ class FExpr_CountNA : public FExpr_Func { wf.get_column_id(i) ); Column coli = evaluate1(wf.retrieve_column(i), gby, is_grouped); - if (COUNT_ALL_ROWS) { - outputs.add_column(std::move(coli), "count", Grouping::GtoONE); - } else { - outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); - } + outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); } return outputs; } @@ -113,11 +123,11 @@ class FExpr_CountNA : public FExpr_Func { template Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( + return Column(new Latent_ColumnImpl(new Count_ColumnImpl( std::move(col), gby ))); } else { - return Column(new Latent_ColumnImpl(new CountNA_ColumnImpl( + return Column(new Latent_ColumnImpl(new Count_ColumnImpl( std::move(col), gby ))); } @@ -126,15 +136,12 @@ class FExpr_CountNA : public FExpr_Func { static py::oobj pyfn_count(const py::XArgs &args) { auto count = args[0].to_oobj_or_none(); - if (count.is_none()) { - return PyFExpr::make(new FExpr_CountNA(as_fexpr(count))); - } - return PyFExpr::make(new FExpr_CountNA(as_fexpr(count))); + return PyFExpr::make(new FExpr_CountNA(as_fexpr(count))); } static py::oobj pyfn_countna(const py::XArgs &args) { auto countna = args[0].to_oobj(); - return PyFExpr::make(new FExpr_CountNA(as_fexpr(countna))); + return PyFExpr::make(new FExpr_CountNA(as_fexpr(countna))); } From b85f1373dce9b660767842397ce7c14af9d779ce Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 12 Mar 2023 01:07:16 +1100 Subject: [PATCH 069/124] 2022 -> 2023 copyright --- src/core/column/count_all_rows.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/core/column/count_all_rows.h b/src/core/column/count_all_rows.h index c23746c15..0022efd48 100644 --- a/src/core/column/count_all_rows.h +++ b/src/core/column/count_all_rows.h @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2022 H2O.ai +// Copyright 2023 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -26,7 +26,6 @@ #include "stype.h" namespace dt { - class CountAllRows_ColumnImpl : public Virtual_ColumnImpl { private: Groupby gby_; From a151dcbc6d09e61e6d7e4add458d6608f2bbc3c0 Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Sun, 12 Mar 2023 01:52:42 +1100 Subject: [PATCH 070/124] Remove irrelevant header file --- src/core/expr/fexpr_count_countna.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index 5ec8dda42..b7efca24f 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -29,7 +29,6 @@ #include "expr/workframe.h" #include "python/xargs.h" #include "stype.h" -#include namespace dt { namespace expr { From 33e5d01c5593eeee687039dcd4558ae5e0b55d26 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 12 Mar 2023 11:41:00 +1100 Subject: [PATCH 071/124] code update with more shortcuts --- src/core/expr/fexpr_count_countna.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index b7efca24f..7c0c92c3c 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -82,9 +82,13 @@ class FExpr_CountNA : public FExpr_Func { bool is_grouped = ctx.has_group_column( wf.get_frame_id(i), wf.get_column_id(i) - ); - Column coli = evaluate1(wf.retrieve_column(i), gby, is_grouped); - outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); + ); + Column coli = wf.retrieve_column(i); + if (COUNTNA && !ctx.has_groupby() && (coli.stype() == SType::VOID)) { + int64_t nrows = static_cast(ctx.nrows()); + coli = Const_ColumnImpl::make_int_column(1, nrows, SType::INT64); + } else {coli = evaluate1(std::move(coli), gby, is_grouped);} + outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); } return outputs; } From 8200141a35bc4d6219cba7af56cd55e877dd187b Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 12 Mar 2023 11:43:16 +1100 Subject: [PATCH 072/124] remove irrelevant header files --- docs/api/dt/count.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/api/dt/count.rst b/docs/api/dt/count.rst index 951a25bfb..90836703b 100644 --- a/docs/api/dt/count.rst +++ b/docs/api/dt/count.rst @@ -3,14 +3,14 @@ :src: src/core/expr/fexpr_count_countna.cc pyfn_count :cvar: doc_dt_count :tests: tests/test-reduce.py - :signature: count(cols) + :signature: count(cols=None) Calculate the number of non-missing values for each column from `cols`. Parameters ---------- cols: FExpr - Input columns. + Input columns. If no `cols` is passed, then the count of all rows is returned. return: Expr f-expression having one row, and the same names and number of columns From ce7e9cd7b945567306d447e249fa604edc8c5c70 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 12 Mar 2023 11:56:04 +1100 Subject: [PATCH 073/124] code update based on feedback --- src/core/expr/fexpr_count_countna.cc | 19 +++++++------- src/core/expr/head_reduce_nullary.cc | 37 ---------------------------- 2 files changed, 9 insertions(+), 47 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index 7c0c92c3c..e46c92482 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -54,22 +54,21 @@ class FExpr_CountNA : public FExpr_Func { Workframe outputs(ctx); Workframe wf = arg_->evaluate_n(ctx); Groupby gby = ctx.get_groupby(); - // this covers a scenario where + // this covers scenarios where // we dont care about the presence or absence of NAs // we just want the total number of rows bool count_all_rows = arg_->get_expr_kind() == Kind::None; - if (count_all_rows && !gby) { - auto value = static_cast(ctx.nrows()); - Column coli = Const_ColumnImpl::make_int_column(1, value, SType::INT64); - outputs.add_column(std::move(coli), "count", Grouping::GtoONE); - return outputs; - } - - if (count_all_rows && gby) { - Column coli = Column(new Latent_ColumnImpl( + if (count_all_rows) { + Column coli; + if (gby){ + coli = Column(new Latent_ColumnImpl( new CountAllRows_ColumnImpl(gby) )); + } else { + auto value = static_cast(ctx.nrows()); + coli = Const_ColumnImpl::make_int_column(1, value, SType::INT64); + } outputs.add_column(std::move(coli), "count", Grouping::GtoONE); return outputs; } diff --git a/src/core/expr/head_reduce_nullary.cc b/src/core/expr/head_reduce_nullary.cc index dd85c9662..2041fd048 100644 --- a/src/core/expr/head_reduce_nullary.cc +++ b/src/core/expr/head_reduce_nullary.cc @@ -31,53 +31,16 @@ namespace expr { -//------------------------------------------------------------------------------ -// count() -//------------------------------------------------------------------------------ - -static Column _count0(EvalContext& ctx) -{ - if (ctx.has_groupby()) { - // TODO: convert this into a virtual column - const Groupby& grpby = ctx.get_groupby(); - size_t ng = grpby.size(); - const int32_t* offsets = grpby.offsets_r(); - Column col = Column::new_data_column(ng, SType::INT64); - auto d_res = static_cast(col.get_data_editable()); - for (size_t i = 0; i < ng; ++i) { - d_res[i] = offsets[i + 1] - offsets[i]; - } - return col; - } - else { - auto value = static_cast(ctx.nrows()); - return Const_ColumnImpl::make_int_column(1, value, SType::INT64); - } -} - - - //------------------------------------------------------------------------------ // Head_Reduce_Nullary //------------------------------------------------------------------------------ -static Workframe _wrap_column(EvalContext& ctx, Column&& col, std::string&& name) { - Workframe outputs(ctx); - outputs.add_column(std::move(col), std::move(name), Grouping::GtoONE); - return outputs; -} - - Workframe Head_Reduce_Nullary::evaluate_n( const vecExpr& args, EvalContext& ctx) const { xassert(args.size() == 0); (void) args; - switch (op) { - case Op::COUNT0: return _wrap_column(ctx, _count0(ctx), "count"); - default: break; - } throw RuntimeError() << "Unknown op " << static_cast(op) << " in Head_Reduce_Nullary"; } From b33d8ef84a0855df327825a3234c467004fdb6ec Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 12 Mar 2023 11:58:51 +1100 Subject: [PATCH 074/124] add more details for count in docs --- docs/api/dt/count.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/api/dt/count.rst b/docs/api/dt/count.rst index 90836703b..a69bb3863 100644 --- a/docs/api/dt/count.rst +++ b/docs/api/dt/count.rst @@ -5,7 +5,8 @@ :tests: tests/test-reduce.py :signature: count(cols=None) - Calculate the number of non-missing values for each column from `cols`. + Calculate the number of non-missing values for each column from `cols`, if `cols` is provided, + or the total number of rows if `cols` is not provided. Parameters ---------- From ccd27d45923f8443da7d2ecb8956b098f17b6c4d Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 12 Mar 2023 12:05:28 +1100 Subject: [PATCH 075/124] countna must have an iterable --- src/core/expr/fexpr_count_countna.cc | 2 +- src/datatable/expr/reduce.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index e46c92482..c9d8d0d98 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -59,7 +59,7 @@ class FExpr_CountNA : public FExpr_Func { // we just want the total number of rows bool count_all_rows = arg_->get_expr_kind() == Kind::None; - if (count_all_rows) { + if (count_all_rows && !COUNTNA) { Column coli; if (gby){ coli = Column(new Latent_ColumnImpl( diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index e094654a6..163790871 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -54,7 +54,7 @@ def nunique(iterable=None): return Expr(OpCodes.NUNIQUE, (iterable,)) -def countna(iterable=None): +def countna(iterable): return core.countna(iterable) From 6b011487d2c8ea63d54075556b23775bf4b13cb9 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 12 Mar 2023 21:26:17 +1100 Subject: [PATCH 076/124] add test for void countna --- tests/dt/test-countna.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/dt/test-countna.py b/tests/dt/test-countna.py index f8660f41d..10e9ff8f6 100644 --- a/tests/dt/test-countna.py +++ b/tests/dt/test-countna.py @@ -74,3 +74,9 @@ def test_dt_count_na2(): EXP = dt.Frame(G=[1,2], V1=[3,1], V2=[1,0]) RES = DT[:, [dt.countna(f.V), dt.countna(dt.mean(f.V))], dt.by(f.G)] assert EXP.to_list() == RES.to_list() + + +def test_dt_countna_void(): + DT = dt.Frame([None]) + RES = DT[:, dt.countna(f.C0), dt.by(f.C0)] + EXP = dt.Frame({"C0":[None], "C1":[1]/dt.int64}) \ No newline at end of file From 069a48899aa2aa16a31965e0b1d72152f133a4c1 Mon Sep 17 00:00:00 2001 From: samukweku Date: Tue, 14 Mar 2023 09:27:05 +1100 Subject: [PATCH 077/124] update based on feedback --- src/core/expr/fexpr_count_countna.cc | 20 ++++++++++---------- src/datatable/expr/reduce.py | 11 ++++++++--- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index c9d8d0d98..39faae5ea 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -99,22 +99,22 @@ class FExpr_CountNA : public FExpr_Func { case SType::VOID: case SType::BOOL: case SType::INT8: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::INT16: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::DATE32: case SType::INT32: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::TIME64: case SType::INT64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::FLOAT32: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::FLOAT64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::STR32: case SType::STR64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), gby, is_grouped); default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr(); @@ -122,14 +122,14 @@ class FExpr_CountNA : public FExpr_Func { } - template + template Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new Count_ColumnImpl( + return Column(new Latent_ColumnImpl(new Count_ColumnImpl( std::move(col), gby ))); } else { - return Column(new Latent_ColumnImpl(new Count_ColumnImpl( + return Column(new Latent_ColumnImpl(new Count_ColumnImpl( std::move(col), gby ))); } diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index 163790871..a9a5070a0 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -44,10 +44,15 @@ def count(iterable=None): - if isinstance(iterable, (Expr, core.FExpr)) or (iterable is None): + if iterable is None: return core.count(iterable) - else: - return _builtin_sum((x is not None) for x in iterable) + if (not isinstance(iterable, dict) + and (isinstance(iterable, core.FExpr) + or (iterable and hasattr(iterable, "__getitem__") and isinstance(iterable[0], core.FExpr)))): + return core.count(iterable) + if isinstance(iterable, dict) and isinstance([*iterable.values()][0], core.FExpr): + return core.count(iterable) + return _builtin_sum((x is not None) for x in iterable) def nunique(iterable=None): From 957fcdf5d15dfcb94d74af4ee91159e2ddd32920 Mon Sep 17 00:00:00 2001 From: samukweku Date: Tue, 14 Mar 2023 09:30:10 +1100 Subject: [PATCH 078/124] defensive steps for empty value --- src/datatable/expr/reduce.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index a9a5070a0..54f715283 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -110,7 +110,7 @@ def corr(col1, col2): def sum(iterable, start=0): if (not isinstance(iterable, dict) and (isinstance(iterable, core.FExpr) - or (hasattr(iterable, "__getitem__") and isinstance(iterable[0], core.FExpr)))): + or (iterable and hasattr(iterable, "__getitem__") and isinstance(iterable[0], core.FExpr)))): return core.sum(iterable) elif isinstance(iterable, dict) and isinstance([*iterable.values()][0], core.FExpr): return core.sum(iterable) @@ -123,7 +123,7 @@ def sum(iterable, start=0): def min(*args, **kwds): if (len(args) == 1 and (not isinstance(args[0], dict)) and (isinstance(args[0], (Expr, core.FExpr)) - or (hasattr(args[0], "__getitem__") and isinstance(args[0][0], (Expr, core.FExpr))))): + or (args[0] and hasattr(args[0], "__getitem__") and isinstance(args[0][0], (Expr, core.FExpr))))): return core.min(args) elif len(args) == 1 and isinstance(args[0], dict) and isinstance([*args[0].values()][0], (Expr, core.FExpr)): return core.min(args) @@ -137,7 +137,7 @@ def min(*args, **kwds): def max(*args, **kwds): if (len(args) == 1 and (not isinstance(args[0], dict)) and (isinstance(args[0], (Expr, core.FExpr)) - or (hasattr(args[0], "__getitem__") and isinstance(args[0][0], (Expr, core.FExpr))))): + or (args[0] and hasattr(args[0], "__getitem__") and isinstance(args[0][0], (Expr, core.FExpr))))): return core.max(args) elif len(args) == 1 and isinstance(args[0], dict) and isinstance([*args[0].values()][0], (Expr, core.FExpr)): return core.max(args) From fafb0957142bd265e5f905c5b8dfb54ae5e599a1 Mon Sep 17 00:00:00 2001 From: samukweku Date: Tue, 14 Mar 2023 10:20:11 +1100 Subject: [PATCH 079/124] update based on feedback --- src/core/column/countna.h | 10 +++++----- src/core/expr/fexpr_count_countna.cc | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/core/column/countna.h b/src/core/column/countna.h index 7752b0801..1de6205ba 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -25,12 +25,12 @@ namespace dt { -template -class Count_ColumnImpl : public ReduceUnary_ColumnImpl { +template +class Count_ColumnImpl : public ReduceUnary_ColumnImpl { public: using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; - bool get_element(size_t i, U* out) const override { + bool get_element(size_t i, int64_t* out) const override { T value; size_t i0, i1; this->gby_.get_group(i, &i0, &i1); @@ -38,9 +38,9 @@ class Count_ColumnImpl : public ReduceUnary_ColumnImpl { if (IS_GROUPED){ bool isvalid = this->col_.get_element(i, &value); if (COUNTNA){ - count = isvalid? 0: static_cast(i1 - i0); + count = isvalid? 0: static_cast(i1 - i0); } else { - count = isvalid? static_cast(i1 - i0) : 0; + count = isvalid? static_cast(i1 - i0) : 0; } *out = count; return true; // *out is not NA diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index 39faae5ea..b94f4c119 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -125,11 +125,11 @@ class FExpr_CountNA : public FExpr_Func { template Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new Count_ColumnImpl( + return Column(new Latent_ColumnImpl(new Count_ColumnImpl( std::move(col), gby ))); } else { - return Column(new Latent_ColumnImpl(new Count_ColumnImpl( + return Column(new Latent_ColumnImpl(new Count_ColumnImpl( std::move(col), gby ))); } From cea8e29d9fe50b9a296d1963f9259cc819b6b7ea Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Tue, 14 Mar 2023 16:42:42 +1100 Subject: [PATCH 080/124] Update src/core/expr/fexpr_count_countna.cc Co-authored-by: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> --- src/core/expr/fexpr_count_countna.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index b94f4c119..e9c7b6475 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -86,7 +86,9 @@ class FExpr_CountNA : public FExpr_Func { if (COUNTNA && !ctx.has_groupby() && (coli.stype() == SType::VOID)) { int64_t nrows = static_cast(ctx.nrows()); coli = Const_ColumnImpl::make_int_column(1, nrows, SType::INT64); - } else {coli = evaluate1(std::move(coli), gby, is_grouped);} + } else { + coli = evaluate1(std::move(coli), gby, is_grouped); + } outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); } return outputs; From 657beb35e34414c2dbf78bb9aab2bbd3078beea1 Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Tue, 14 Mar 2023 16:43:43 +1100 Subject: [PATCH 081/124] Update src/core/column/countna.h Co-authored-by: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> --- src/core/column/countna.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/column/countna.h b/src/core/column/countna.h index 1de6205ba..0fd88d036 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -41,7 +41,7 @@ class Count_ColumnImpl : public ReduceUnary_ColumnImpl { count = isvalid? 0: static_cast(i1 - i0); } else { count = isvalid? static_cast(i1 - i0) : 0; - } + } *out = count; return true; // *out is not NA } else { From b0a3595465490d1269134339b2f9da4a7fecccb4 Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Tue, 14 Mar 2023 16:43:51 +1100 Subject: [PATCH 082/124] Update src/core/column/countna.h Co-authored-by: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> --- src/core/column/countna.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/column/countna.h b/src/core/column/countna.h index 0fd88d036..821118ea0 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -38,7 +38,7 @@ class Count_ColumnImpl : public ReduceUnary_ColumnImpl { if (IS_GROUPED){ bool isvalid = this->col_.get_element(i, &value); if (COUNTNA){ - count = isvalid? 0: static_cast(i1 - i0); + count = isvalid? 0 : static_cast(i1 - i0); } else { count = isvalid? static_cast(i1 - i0) : 0; } From f357edd760c32791568a7b4e8d384bc51b54b8d7 Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Tue, 14 Mar 2023 16:44:19 +1100 Subject: [PATCH 083/124] Update src/core/column/countna.h Co-authored-by: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> --- src/core/column/countna.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/core/column/countna.h b/src/core/column/countna.h index 821118ea0..f84afee1a 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -45,13 +45,13 @@ class Count_ColumnImpl : public ReduceUnary_ColumnImpl { *out = count; return true; // *out is not NA } else { - for (size_t gi = i0; gi < i1; ++gi) { - bool isvalid = this->col_.get_element(gi, &value); - count += COUNTNA? !isvalid : isvalid; - } - *out = count; - return true; // *out is not NA + for (size_t gi = i0; gi < i1; ++gi) { + bool isvalid = this->col_.get_element(gi, &value); + count += COUNTNA? !isvalid : isvalid; } + *out = count; + return true; // *out is not NA + } } }; From cf15fbe63e1d88cf5a46ac9dfae53a76c44b7b21 Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Tue, 14 Mar 2023 16:44:35 +1100 Subject: [PATCH 084/124] Update docs/api/dt/count.rst Co-authored-by: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> --- docs/api/dt/count.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/api/dt/count.rst b/docs/api/dt/count.rst index a69bb3863..f791554a0 100644 --- a/docs/api/dt/count.rst +++ b/docs/api/dt/count.rst @@ -5,8 +5,8 @@ :tests: tests/test-reduce.py :signature: count(cols=None) - Calculate the number of non-missing values for each column from `cols`, if `cols` is provided, - or the total number of rows if `cols` is not provided. + Calculate the number of non-missing values for each column from `cols`. When `cols` is not provided, + calculate the total number of rows. Parameters ---------- From 8fbeabfcff84f5a2153e0111c0d15db344b56fe1 Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Tue, 14 Mar 2023 16:45:40 +1100 Subject: [PATCH 085/124] Update docs/api/dt/count.rst Co-authored-by: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> --- docs/api/dt/count.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api/dt/count.rst b/docs/api/dt/count.rst index f791554a0..e57af256d 100644 --- a/docs/api/dt/count.rst +++ b/docs/api/dt/count.rst @@ -11,7 +11,7 @@ Parameters ---------- cols: FExpr - Input columns. If no `cols` is passed, then the count of all rows is returned. + Input columns if any. return: Expr f-expression having one row, and the same names and number of columns From 729591bd4e3ca8f03964c9f20815d72617d37fd8 Mon Sep 17 00:00:00 2001 From: samukweku Date: Tue, 14 Mar 2023 17:21:39 +1100 Subject: [PATCH 086/124] more descriptive template variables --- src/core/column/countna.h | 8 ++++---- src/core/column/minmax.h | 8 ++++---- src/core/column/reduce_unary.h | 6 +++--- src/core/column/sumprod.h | 8 ++++---- src/core/expr/fexpr_count_countna.cc | 6 +++--- src/core/expr/fexpr_mean.cc | 6 +++--- src/core/expr/fexpr_sumprod.cc | 6 +++--- 7 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/core/column/countna.h b/src/core/column/countna.h index f84afee1a..5c9bba6bb 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -25,13 +25,13 @@ namespace dt { -template -class Count_ColumnImpl : public ReduceUnary_ColumnImpl { +template +class Count_ColumnImpl : public ReduceUnary_ColumnImpl { public: - using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; + using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; bool get_element(size_t i, int64_t* out) const override { - T value; + T_IN value; size_t i0, i1; this->gby_.get_group(i, &i0, &i1); int64_t count = 0; diff --git a/src/core/column/minmax.h b/src/core/column/minmax.h index 95fbd0599..a339e67ef 100644 --- a/src/core/column/minmax.h +++ b/src/core/column/minmax.h @@ -35,14 +35,14 @@ class MinMax_ColumnImpl : public ReduceUnary_ColumnImpl { // initially being set to `true`. So the default value here // only silences the compiler warning and makes the update // to happen a little bit faster, but it has no effect on the final result. - T res = MIN ? std::numeric_limits::max() - : std::numeric_limits::min(); + T_IN res = MIN ? std::numeric_limits::max() + : std::numeric_limits::min(); bool res_isna = true; size_t i0, i1; this->gby_.get_group(i, &i0, &i1); for (size_t gi = i0; gi < i1; ++gi) { - T value; + T_IN value; bool isvalid = this->col_.get_element(gi, &value); if (MIN) { if (isvalid && (value < res || res_isna)) { @@ -57,7 +57,7 @@ class MinMax_ColumnImpl : public ReduceUnary_ColumnImpl { } } - *out = static_cast(res); + *out = static_cast(res); return !res_isna; } }; diff --git a/src/core/column/reduce_unary.h b/src/core/column/reduce_unary.h index 3eeb39255..1dbe95552 100644 --- a/src/core/column/reduce_unary.h +++ b/src/core/column/reduce_unary.h @@ -26,7 +26,7 @@ namespace dt { -template +template class ReduceUnary_ColumnImpl : public Virtual_ColumnImpl { protected: Column col_; @@ -35,11 +35,11 @@ class ReduceUnary_ColumnImpl : public Virtual_ColumnImpl { public: ReduceUnary_ColumnImpl(Column &&col, const Groupby& gby) - : Virtual_ColumnImpl(gby.size(), stype_from), + : Virtual_ColumnImpl(gby.size(), stype_from), col_(std::move(col)), gby_(gby) { - xassert(col_.can_be_read_as()); + xassert(col_.can_be_read_as()); } diff --git a/src/core/column/sumprod.h b/src/core/column/sumprod.h index 10bb3a074..c461996b6 100644 --- a/src/core/column/sumprod.h +++ b/src/core/column/sumprod.h @@ -31,9 +31,9 @@ class SumProd_ColumnImpl : public ReduceUnary_ColumnImpl { public: using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; - bool get_element(size_t i, U* out) const override { - T result = !SUM; // 0 for `sum()` and 1 for `prod()` - T value; + bool get_element(size_t i, T_OUT* out) const override { + T_IN result = !SUM; // 0 for `sum()` and 1 for `prod()` + T_IN value; size_t i0, i1; this->gby_.get_group(i, &i0, &i1); @@ -41,7 +41,7 @@ class SumProd_ColumnImpl : public ReduceUnary_ColumnImpl { size_t nrows = i1 - i0; bool is_valid = this->col_.get_element(i, &value); if (is_valid){ - result = SUM? static_cast(nrows) * value + result = SUM? static_cast(nrows) * value : ipow(value, nrows); } } else { diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index e9c7b6475..5bbab5d7a 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -124,14 +124,14 @@ class FExpr_CountNA : public FExpr_Func { } - template + template Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new Count_ColumnImpl( + return Column(new Latent_ColumnImpl(new Count_ColumnImpl( std::move(col), gby ))); } else { - return Column(new Latent_ColumnImpl(new Count_ColumnImpl( + return Column(new Latent_ColumnImpl(new Count_ColumnImpl( std::move(col), gby ))); } diff --git a/src/core/expr/fexpr_mean.cc b/src/core/expr/fexpr_mean.cc index 162417367..1b9b360ea 100644 --- a/src/core/expr/fexpr_mean.cc +++ b/src/core/expr/fexpr_mean.cc @@ -85,14 +85,14 @@ class FExpr_Mean : public FExpr_ReduceUnary { } - template + template Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( + return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( std::move(col), gby ))); } else { - return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( + return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( std::move(col), gby ))); } diff --git a/src/core/expr/fexpr_sumprod.cc b/src/core/expr/fexpr_sumprod.cc index 4daf57b84..4c0a8c911 100644 --- a/src/core/expr/fexpr_sumprod.cc +++ b/src/core/expr/fexpr_sumprod.cc @@ -67,14 +67,14 @@ class FExpr_SumProd : public FExpr_ReduceUnary { } - template + template Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( + return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( std::move(col), gby ))); } else { - return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( + return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( std::move(col), gby ))); } From 6fa2664c929af55d4b1522f0ca5a85e5d0408ab9 Mon Sep 17 00:00:00 2001 From: samukweku Date: Tue, 14 Mar 2023 17:28:43 +1100 Subject: [PATCH 087/124] single template type for min/max/sum/prod --- src/core/column/minmax.h | 2 +- src/core/column/sumprod.h | 2 +- src/core/expr/fexpr_sumprod.cc | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core/column/minmax.h b/src/core/column/minmax.h index a339e67ef..93dce5fe1 100644 --- a/src/core/column/minmax.h +++ b/src/core/column/minmax.h @@ -57,7 +57,7 @@ class MinMax_ColumnImpl : public ReduceUnary_ColumnImpl { } } - *out = static_cast(res); + *out = static_cast(res); return !res_isna; } }; diff --git a/src/core/column/sumprod.h b/src/core/column/sumprod.h index c461996b6..4055e4e2e 100644 --- a/src/core/column/sumprod.h +++ b/src/core/column/sumprod.h @@ -31,7 +31,7 @@ class SumProd_ColumnImpl : public ReduceUnary_ColumnImpl { public: using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; - bool get_element(size_t i, T_OUT* out) const override { + bool get_element(size_t i, T_IN* out) const override { T_IN result = !SUM; // 0 for `sum()` and 1 for `prod()` T_IN value; size_t i0, i1; diff --git a/src/core/expr/fexpr_sumprod.cc b/src/core/expr/fexpr_sumprod.cc index 4c0a8c911..e452f8da2 100644 --- a/src/core/expr/fexpr_sumprod.cc +++ b/src/core/expr/fexpr_sumprod.cc @@ -70,11 +70,11 @@ class FExpr_SumProd : public FExpr_ReduceUnary { template Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( + return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( std::move(col), gby ))); } else { - return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( + return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( std::move(col), gby ))); } From ef9553ae013685d371491b928a2ff1feebda98fb Mon Sep 17 00:00:00 2001 From: samukweku Date: Tue, 14 Mar 2023 20:28:42 +1100 Subject: [PATCH 088/124] count all rows use unary impl --- src/core/column/count_all_rows.h | 46 +++++----------------------- src/core/expr/fexpr_count_countna.cc | 11 +++---- tests/dt/test-countna.py | 3 +- 3 files changed, 14 insertions(+), 46 deletions(-) diff --git a/src/core/column/count_all_rows.h b/src/core/column/count_all_rows.h index 0022efd48..6831b6338 100644 --- a/src/core/column/count_all_rows.h +++ b/src/core/column/count_all_rows.h @@ -21,51 +21,21 @@ //------------------------------------------------------------------------------ #ifndef dt_COLUMN_COUNTALLROWS_h #define dt_COLUMN_COUNTALLROWS_h -#include "column/virtual.h" -#include "parallel/api.h" -#include "stype.h" +#include "column/reduce_unary.h" namespace dt { -class CountAllRows_ColumnImpl : public Virtual_ColumnImpl { - private: - Groupby gby_; +class CountAllRows_ColumnImpl : public ReduceUnary_ColumnImpl { public: - CountAllRows_ColumnImpl(const Groupby& gby) - : Virtual_ColumnImpl(gby.size(), SType::INT64), - gby_(gby) - {} + using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; - - ColumnImpl* clone() const override { - return new CountAllRows_ColumnImpl(gby_); - } - - - size_t n_children() const noexcept override { - return 0; + bool get_element(size_t i, int64_t* out) const override { + size_t i0, i1; + this->gby_.get_group(i, &i0, &i1); + *out = static_cast(i1 - i0); + return true; } - - void materialize(Column &col_out, bool) override { - size_t nrows = gby_.size(); - const int32_t* offsets = gby_.offsets_r(); - Column col = Column::new_data_column(nrows, SType::INT64); - auto data = static_cast(col.get_data_editable()); - dt::parallel_for_dynamic(gby_.size(), - [&](size_t gi) { - for (size_t i = 0; i < nrows; ++i) { - data[i] = offsets[i + 1] - offsets[i]; - } - } - ); - - col_out = std::move(col); - } - }; - } // namespace dt - - #endif diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index 5bbab5d7a..bf4f50cc7 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -62,10 +62,8 @@ class FExpr_CountNA : public FExpr_Func { if (count_all_rows && !COUNTNA) { Column coli; if (gby){ - coli = Column(new Latent_ColumnImpl( - new CountAllRows_ColumnImpl(gby) - )); - } else { + coli = Column(new Latent_ColumnImpl(new CountAllRows_ColumnImpl(wf.retrieve_column(0), gby))); + } else{ auto value = static_cast(ctx.nrows()); coli = Const_ColumnImpl::make_int_column(1, value, SType::INT64); } @@ -87,8 +85,8 @@ class FExpr_CountNA : public FExpr_Func { int64_t nrows = static_cast(ctx.nrows()); coli = Const_ColumnImpl::make_int_column(1, nrows, SType::INT64); } else { - coli = evaluate1(std::move(coli), gby, is_grouped); - } + coli = evaluate1(std::move(coli), gby, is_grouped); + } outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); } return outputs; @@ -123,7 +121,6 @@ class FExpr_CountNA : public FExpr_Func { } } - template Column make(Column &&col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { diff --git a/tests/dt/test-countna.py b/tests/dt/test-countna.py index 10e9ff8f6..a5235a610 100644 --- a/tests/dt/test-countna.py +++ b/tests/dt/test-countna.py @@ -79,4 +79,5 @@ def test_dt_count_na2(): def test_dt_countna_void(): DT = dt.Frame([None]) RES = DT[:, dt.countna(f.C0), dt.by(f.C0)] - EXP = dt.Frame({"C0":[None], "C1":[1]/dt.int64}) \ No newline at end of file + EXP = dt.Frame({"C0":[None], "C1":[1]/dt.int64}) + From ab846a4758b6b21bb4be0a76da2f94d512bba9b2 Mon Sep 17 00:00:00 2001 From: samukweku Date: Tue, 14 Mar 2023 20:51:20 +1100 Subject: [PATCH 089/124] create dummy column for count --- src/core/expr/fexpr_count_countna.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index bf4f50cc7..d158b1779 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -61,10 +61,12 @@ class FExpr_CountNA : public FExpr_Func { if (count_all_rows && !COUNTNA) { Column coli; + auto value = static_cast(ctx.nrows()); if (gby){ - coli = Column(new Latent_ColumnImpl(new CountAllRows_ColumnImpl(wf.retrieve_column(0), gby))); + coli = Const_ColumnImpl::make_int_column(value, 1, SType::INT64); + coli = Column(new Latent_ColumnImpl(new CountAllRows_ColumnImpl(std::move(coli), gby))); } else{ - auto value = static_cast(ctx.nrows()); + coli = Const_ColumnImpl::make_int_column(1, value, SType::INT64); } outputs.add_column(std::move(coli), "count", Grouping::GtoONE); From 6499d23303d9c693947511c04406c573de2ab1e3 Mon Sep 17 00:00:00 2001 From: samukweku Date: Tue, 14 Mar 2023 20:51:45 +1100 Subject: [PATCH 090/124] remove whitespace --- src/core/expr/fexpr_count_countna.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index d158b1779..979b083d5 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -65,8 +65,7 @@ class FExpr_CountNA : public FExpr_Func { if (gby){ coli = Const_ColumnImpl::make_int_column(value, 1, SType::INT64); coli = Column(new Latent_ColumnImpl(new CountAllRows_ColumnImpl(std::move(coli), gby))); - } else{ - + } else{ coli = Const_ColumnImpl::make_int_column(1, value, SType::INT64); } outputs.add_column(std::move(coli), "count", Grouping::GtoONE); From 7c3dce07acb8b1772b49e295a0130c8d5d479aa8 Mon Sep 17 00:00:00 2001 From: samukweku Date: Wed, 15 Mar 2023 09:22:36 +1100 Subject: [PATCH 091/124] add countna when cols is None --- src/core/expr/fexpr_count_countna.cc | 6 ++++++ src/datatable/expr/reduce.py | 2 +- tests/dt/test-countna.py | 11 +++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index 979b083d5..e7a802036 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -76,6 +76,12 @@ class FExpr_CountNA : public FExpr_Func { gby = Groupby::single_group(wf.nrows()); } + if (count_all_rows && COUNTNA){ + Column coli = Const_ColumnImpl::make_int_column(gby.size(), 0, SType::INT64); + outputs.add_column(std::move(coli), std::string(), Grouping::GtoONE); + return outputs; + } + for (size_t i = 0; i < wf.ncols(); ++i) { bool is_grouped = ctx.has_group_column( wf.get_frame_id(i), diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index 54f715283..0c9d0fa21 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -59,7 +59,7 @@ def nunique(iterable=None): return Expr(OpCodes.NUNIQUE, (iterable,)) -def countna(iterable): +def countna(iterable=None): return core.countna(iterable) diff --git a/tests/dt/test-countna.py b/tests/dt/test-countna.py index a5235a610..79148ebcb 100644 --- a/tests/dt/test-countna.py +++ b/tests/dt/test-countna.py @@ -81,3 +81,14 @@ def test_dt_countna_void(): RES = DT[:, dt.countna(f.C0), dt.by(f.C0)] EXP = dt.Frame({"C0":[None], "C1":[1]/dt.int64}) +def test_dt_countna_None_by(): + DT = dt.Frame(G=[1,1,1,2,2,2], V=[None, None, None, None, 3, 5]) + EXP = dt.Frame(G=[1,2], C0=[0,0]) + RES = DT[:, dt.countna(), f.G] + assert EXP.to_list() == RES.to_list() + +def test_dt_countna_None(): + DT = dt.Frame(G=[1,1,1,2,2,2], V=[None, None, None, None, 3, 5]) + EXP = dt.Frame(C0=[0]) + RES = DT[:, dt.countna()] + assert EXP.to_list() == RES.to_list() \ No newline at end of file From 826fadfca6f64ce85246265761f42a7259e51531 Mon Sep 17 00:00:00 2001 From: samukweku Date: Wed, 15 Mar 2023 09:26:08 +1100 Subject: [PATCH 092/124] add more details for count and countna --- docs/api/dt/count.rst | 2 ++ docs/api/dt/countna.rst | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/api/dt/count.rst b/docs/api/dt/count.rst index e57af256d..58ab320a4 100644 --- a/docs/api/dt/count.rst +++ b/docs/api/dt/count.rst @@ -16,6 +16,8 @@ return: Expr f-expression having one row, and the same names and number of columns as in `cols`. All the returned column stypes are `int64`. + If `cols` is not provided, the total number of rows + (a combination of the count of missing and non-missing values) is returned. except: TypeError The exception is raised when one of the columns from `cols` diff --git a/docs/api/dt/countna.rst b/docs/api/dt/countna.rst index fafbc0f1e..932fbab5d 100644 --- a/docs/api/dt/countna.rst +++ b/docs/api/dt/countna.rst @@ -7,7 +7,7 @@ .. x-version-added:: 1.1.0 - Count the number of NA values for each column from `cols`. + Count the number of NA values for each column from `cols`. Parameters ---------- @@ -17,6 +17,7 @@ return: Expr f-expression having one row, and the same names and number of columns as in `cols`. All the returned column stypes are `int64`. + If `cols` is not provided, 0 is returned per group. except: TypeError The exception is raised when one of the columns from `cols` From e6d188b783cafdf4d4ab67efd022459963d15588 Mon Sep 17 00:00:00 2001 From: samukweku Date: Wed, 15 Mar 2023 20:59:23 +1100 Subject: [PATCH 093/124] update countna --- docs/api/dt/count.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api/dt/count.rst b/docs/api/dt/count.rst index 58ab320a4..d8ec3b7d6 100644 --- a/docs/api/dt/count.rst +++ b/docs/api/dt/count.rst @@ -68,4 +68,4 @@ See Also -------- - - :func:`sum()` -- function to calculate the sum of values. + - :func:`counta()` -- function to count the number of missing values. From 82fdf49d20fdee99fc120e1f55ebc6af705a6465 Mon Sep 17 00:00:00 2001 From: samukweku Date: Wed, 15 Mar 2023 22:06:22 +1100 Subject: [PATCH 094/124] fix countna docs link --- docs/api/dt/count.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api/dt/count.rst b/docs/api/dt/count.rst index d8ec3b7d6..a62f60537 100644 --- a/docs/api/dt/count.rst +++ b/docs/api/dt/count.rst @@ -68,4 +68,4 @@ See Also -------- - - :func:`counta()` -- function to count the number of missing values. + - :func:`countna()` -- function to count the number of missing values. From 1555b5e429fab8cdc87b14ee61b080104bfe898b Mon Sep 17 00:00:00 2001 From: samukweku Date: Thu, 16 Mar 2023 09:10:25 +1100 Subject: [PATCH 095/124] update counta logic for Frame vs FExpr --- src/datatable/expr/reduce.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index 0c9d0fa21..b9ffd884b 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -60,6 +60,8 @@ def nunique(iterable=None): def countna(iterable=None): + if isinstance(iterable, core.Frame): + return iterable.countna() return core.countna(iterable) From 20626b2f26a1c55b7fb61ff7c82defbea9ad314c Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Fri, 17 Mar 2023 09:56:10 +1100 Subject: [PATCH 096/124] Update src/core/expr/fexpr_count_countna.cc Co-authored-by: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> --- src/core/expr/fexpr_count_countna.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index e7a802036..02ad8e6ff 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -92,8 +92,8 @@ class FExpr_CountNA : public FExpr_Func { int64_t nrows = static_cast(ctx.nrows()); coli = Const_ColumnImpl::make_int_column(1, nrows, SType::INT64); } else { - coli = evaluate1(std::move(coli), gby, is_grouped); - } + coli = evaluate1(std::move(coli), gby, is_grouped); + } outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); } return outputs; From 56fef37c4f97dba82a89ef1d7dff1c79b2d479b0 Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Fri, 17 Mar 2023 09:56:18 +1100 Subject: [PATCH 097/124] Update src/core/expr/fexpr_count_countna.cc Co-authored-by: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> --- src/core/expr/fexpr_count_countna.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index 02ad8e6ff..4402d9f15 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -66,8 +66,8 @@ class FExpr_CountNA : public FExpr_Func { coli = Const_ColumnImpl::make_int_column(value, 1, SType::INT64); coli = Column(new Latent_ColumnImpl(new CountAllRows_ColumnImpl(std::move(coli), gby))); } else{ - coli = Const_ColumnImpl::make_int_column(1, value, SType::INT64); - } + coli = Const_ColumnImpl::make_int_column(1, value, SType::INT64); + } outputs.add_column(std::move(coli), "count", Grouping::GtoONE); return outputs; } From 3bca52939790f9e3a5ec70bf0c713191d17f1f8a Mon Sep 17 00:00:00 2001 From: samukweku Date: Fri, 17 Mar 2023 21:20:14 +1100 Subject: [PATCH 098/124] update count_all_rows to avoid dummy col creation --- src/core/column/count_all_rows.h | 6 ++-- src/core/column/reduce_nullary.h | 53 ++++++++++++++++++++++++++++ src/core/expr/fexpr_count_countna.cc | 3 +- 3 files changed, 57 insertions(+), 5 deletions(-) create mode 100644 src/core/column/reduce_nullary.h diff --git a/src/core/column/count_all_rows.h b/src/core/column/count_all_rows.h index 6831b6338..de0a1a334 100644 --- a/src/core/column/count_all_rows.h +++ b/src/core/column/count_all_rows.h @@ -21,13 +21,13 @@ //------------------------------------------------------------------------------ #ifndef dt_COLUMN_COUNTALLROWS_h #define dt_COLUMN_COUNTALLROWS_h -#include "column/reduce_unary.h" +#include "column/reduce_nullary.h" namespace dt { -class CountAllRows_ColumnImpl : public ReduceUnary_ColumnImpl { +class CountAllRows_ColumnImpl : public ReduceNullary_ColumnImpl { public: - using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; + using ReduceNullary_ColumnImpl::ReduceNullary_ColumnImpl; bool get_element(size_t i, int64_t* out) const override { size_t i0, i1; diff --git a/src/core/column/reduce_nullary.h b/src/core/column/reduce_nullary.h new file mode 100644 index 000000000..1019b3595 --- /dev/null +++ b/src/core/column/reduce_nullary.h @@ -0,0 +1,53 @@ +//------------------------------------------------------------------------------ +// Copyright 2023 H2O.ai +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. +//------------------------------------------------------------------------------ +#ifndef dt_COLUMN_REDUCE_NULLARY_h +#define dt_COLUMN_REDUCE_NULLARY_h +#include "column/virtual.h" +#include "stype.h" +namespace dt { + + +class ReduceNullary_ColumnImpl : public Virtual_ColumnImpl { + protected: + Groupby gby_; + + + public: + ReduceNullary_ColumnImpl(const Groupby& gby, SType stype) + : Virtual_ColumnImpl(gby.size(), stype), + gby_(gby) + {} + + + ColumnImpl *clone() const override { + return new ReduceNullary_ColumnImpl(Groupby(gby_), this->stype()); + } + + size_t n_children() const noexcept override { + return 0; + } + +}; + + +} // namespace dt +#endif diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index 4402d9f15..f987d4583 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -63,8 +63,7 @@ class FExpr_CountNA : public FExpr_Func { Column coli; auto value = static_cast(ctx.nrows()); if (gby){ - coli = Const_ColumnImpl::make_int_column(value, 1, SType::INT64); - coli = Column(new Latent_ColumnImpl(new CountAllRows_ColumnImpl(std::move(coli), gby))); + coli = Column(new Latent_ColumnImpl(new CountAllRows_ColumnImpl(gby, SType::INT64))); } else{ coli = Const_ColumnImpl::make_int_column(1, value, SType::INT64); } From 7da25f236d4daca27c8597edf55c1a371da6be9e Mon Sep 17 00:00:00 2001 From: samukweku Date: Sat, 18 Mar 2023 12:48:20 +1100 Subject: [PATCH 099/124] cast inplace for sumprod --- src/core/expr/fexpr_sumprod.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/expr/fexpr_sumprod.cc b/src/core/expr/fexpr_sumprod.cc index e452f8da2..814a36dd5 100644 --- a/src/core/expr/fexpr_sumprod.cc +++ b/src/core/expr/fexpr_sumprod.cc @@ -55,6 +55,7 @@ class FExpr_SumProd : public FExpr_ReduceUnary { case SType::INT16: case SType::INT32: case SType::INT64: + col.cast_inplace(SType::INT64); return make(std::move(col), gby, is_grouped); case SType::FLOAT32: return make(std::move(col), gby, is_grouped); From 2d579f2b217e117a2a5975f64d83cc56b5c25ec5 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sat, 18 Mar 2023 15:53:15 +1100 Subject: [PATCH 100/124] restore cast in place for sumprod --- src/core/expr/fexpr_sumprod.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/core/expr/fexpr_sumprod.cc b/src/core/expr/fexpr_sumprod.cc index 814a36dd5..4e985059c 100644 --- a/src/core/expr/fexpr_sumprod.cc +++ b/src/core/expr/fexpr_sumprod.cc @@ -55,12 +55,11 @@ class FExpr_SumProd : public FExpr_ReduceUnary { case SType::INT16: case SType::INT32: case SType::INT64: - col.cast_inplace(SType::INT64); - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::INT64, gby, is_grouped); case SType::FLOAT32: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::FLOAT32, gby, is_grouped); case SType::FLOAT64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::FLOAT64, gby, is_grouped); default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr(); @@ -69,7 +68,8 @@ class FExpr_SumProd : public FExpr_ReduceUnary { template - Column make(Column &&col, const Groupby& gby, bool is_grouped) const { + Column make(Column &&col, SType stype, const Groupby& gby, bool is_grouped) const { + col.cast_inplace(stype); if (is_grouped) { return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( std::move(col), gby From 50d1fad59b53f53cc43681bd9190194de98d8fc9 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 19 Mar 2023 22:12:28 +1100 Subject: [PATCH 101/124] add explicit stype to reduce_unary.h --- src/core/column/reduce_unary.h | 6 +++--- src/core/expr/fexpr_count_countna.cc | 20 ++++++++++---------- src/core/expr/fexpr_mean.cc | 25 +++++++++++-------------- src/core/expr/fexpr_minmax.cc | 4 ++-- src/core/expr/fexpr_sumprod.cc | 4 ++-- tests/dt/test-countna.py | 2 +- 6 files changed, 29 insertions(+), 32 deletions(-) diff --git a/src/core/column/reduce_unary.h b/src/core/column/reduce_unary.h index 1dbe95552..a9f966b69 100644 --- a/src/core/column/reduce_unary.h +++ b/src/core/column/reduce_unary.h @@ -34,8 +34,8 @@ class ReduceUnary_ColumnImpl : public Virtual_ColumnImpl { public: - ReduceUnary_ColumnImpl(Column &&col, const Groupby& gby) - : Virtual_ColumnImpl(gby.size(), stype_from), + ReduceUnary_ColumnImpl(Column &&col, SType stype, const Groupby& gby) + : Virtual_ColumnImpl(gby.size(), stype), col_(std::move(col)), gby_(gby) { @@ -44,7 +44,7 @@ class ReduceUnary_ColumnImpl : public Virtual_ColumnImpl { ColumnImpl *clone() const override { - return new ReduceUnary_ColumnImpl(Column(col_), Groupby(gby_)); + return new ReduceUnary_ColumnImpl(Column(col_), this->stype(), Groupby(gby_)); } diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index f987d4583..eadbe99cf 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -105,22 +105,22 @@ class FExpr_CountNA : public FExpr_Func { case SType::VOID: case SType::BOOL: case SType::INT8: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::INT64, gby, is_grouped); case SType::INT16: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::INT64, gby, is_grouped); case SType::DATE32: case SType::INT32: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::INT64, gby, is_grouped); case SType::TIME64: case SType::INT64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::INT64, gby, is_grouped); case SType::FLOAT32: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::INT64, gby, is_grouped); case SType::FLOAT64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::INT64, gby, is_grouped); case SType::STR32: case SType::STR64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::INT64, gby, is_grouped); default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr(); @@ -128,14 +128,14 @@ class FExpr_CountNA : public FExpr_Func { } template - Column make(Column &&col, const Groupby& gby, bool is_grouped) const { + Column make(Column &&col, SType stype, const Groupby& gby, bool is_grouped) const { if (is_grouped) { return Column(new Latent_ColumnImpl(new Count_ColumnImpl( - std::move(col), gby + std::move(col), stype, gby ))); } else { return Column(new Latent_ColumnImpl(new Count_ColumnImpl( - std::move(col), gby + std::move(col), stype, gby ))); } } diff --git a/src/core/expr/fexpr_mean.cc b/src/core/expr/fexpr_mean.cc index 1b9b360ea..9c24af174 100644 --- a/src/core/expr/fexpr_mean.cc +++ b/src/core/expr/fexpr_mean.cc @@ -52,24 +52,20 @@ class FExpr_Mean : public FExpr_ReduceUnary { )); case SType::BOOL: case SType::INT8: - return make(std::move(col), gby, is_grouped); case SType::INT16: - return make(std::move(col), gby, is_grouped); case SType::INT32: - return make(std::move(col), gby, is_grouped); case SType::INT64: - return make(std::move(col), gby, is_grouped); case SType::FLOAT64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::FLOAT64, gby, is_grouped); case SType::FLOAT32: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::FLOAT32, gby, is_grouped); case SType::DATE32: { - Column coli = make(std::move(col), gby, is_grouped); + Column coli = make(std::move(col), SType::FLOAT64, gby, is_grouped); coli.cast_inplace(SType::DATE32); return coli; } case SType::TIME64: { - Column coli = make(std::move(col), gby, is_grouped); + Column coli = make(std::move(col), SType::FLOAT64, gby, is_grouped); coli.cast_inplace(SType::TIME64); return coli; } @@ -85,15 +81,16 @@ class FExpr_Mean : public FExpr_ReduceUnary { } - template - Column make(Column &&col, const Groupby& gby, bool is_grouped) const { + template + Column make(Column &&col, SType stype, const Groupby& gby, bool is_grouped) const { + col.cast_inplace(stype); if (is_grouped) { - return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( - std::move(col), gby + return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( + std::move(col), stype, gby ))); } else { - return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( - std::move(col), gby + return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( + std::move(col), stype, gby ))); } } diff --git a/src/core/expr/fexpr_minmax.cc b/src/core/expr/fexpr_minmax.cc index 3a5faf5f6..9fd81fa35 100644 --- a/src/core/expr/fexpr_minmax.cc +++ b/src/core/expr/fexpr_minmax.cc @@ -52,7 +52,7 @@ class FExpr_MinMax : public FExpr_ReduceUnary { return Column(new ConstNa_ColumnImpl(gby.size(), stype)); case SType::BOOL: case SType::INT8: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::INT8, gby, is_grouped); case SType::INT16: return make(std::move(col), gby, is_grouped); case SType::INT32: @@ -62,7 +62,7 @@ class FExpr_MinMax : public FExpr_ReduceUnary { case SType::TIME64: return make(std::move(col), gby, is_grouped); case SType::FLOAT32: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::FLOAT32, gby, is_grouped); case SType::FLOAT64: return make(std::move(col), gby, is_grouped); default: diff --git a/src/core/expr/fexpr_sumprod.cc b/src/core/expr/fexpr_sumprod.cc index 4e985059c..5284e1fc6 100644 --- a/src/core/expr/fexpr_sumprod.cc +++ b/src/core/expr/fexpr_sumprod.cc @@ -72,11 +72,11 @@ class FExpr_SumProd : public FExpr_ReduceUnary { col.cast_inplace(stype); if (is_grouped) { return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( - std::move(col), gby + std::move(col), stype, gby ))); } else { return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( - std::move(col), gby + std::move(col), stype, gby ))); } } diff --git a/tests/dt/test-countna.py b/tests/dt/test-countna.py index 79148ebcb..4b085e35e 100644 --- a/tests/dt/test-countna.py +++ b/tests/dt/test-countna.py @@ -68,7 +68,7 @@ def test_dt_count_na1(src): RES = df[:, dt.countna(f[:])] assert_equals(EXP, RES) - +@pytest.mark.xfail(reason="commented out till #3417 is resolved.") def test_dt_count_na2(): DT = dt.Frame(G=[1,1,1,2,2,2], V=[None, None, None, None, 3, 5]) EXP = dt.Frame(G=[1,2], V1=[3,1], V2=[1,0]) From 9d291548bbc72e160c2e71f92a18e16932de4c2c Mon Sep 17 00:00:00 2001 From: samukweku Date: Mon, 20 Mar 2023 09:27:26 +1100 Subject: [PATCH 102/124] rename template parameter for single type --- src/core/column/minmax.h | 8 ++++---- src/core/expr/fexpr_mean.cc | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/core/column/minmax.h b/src/core/column/minmax.h index 93dce5fe1..9bfdb2905 100644 --- a/src/core/column/minmax.h +++ b/src/core/column/minmax.h @@ -35,14 +35,14 @@ class MinMax_ColumnImpl : public ReduceUnary_ColumnImpl { // initially being set to `true`. So the default value here // only silences the compiler warning and makes the update // to happen a little bit faster, but it has no effect on the final result. - T_IN res = MIN ? std::numeric_limits::max() - : std::numeric_limits::min(); + T res = MIN ? std::numeric_limits::max() + : std::numeric_limits::min(); bool res_isna = true; size_t i0, i1; this->gby_.get_group(i, &i0, &i1); for (size_t gi = i0; gi < i1; ++gi) { - T_IN value; + T value; bool isvalid = this->col_.get_element(gi, &value); if (MIN) { if (isvalid && (value < res || res_isna)) { @@ -57,7 +57,7 @@ class MinMax_ColumnImpl : public ReduceUnary_ColumnImpl { } } - *out = static_cast(res); + *out = static_cast(res); return !res_isna; } }; diff --git a/src/core/expr/fexpr_mean.cc b/src/core/expr/fexpr_mean.cc index 9c24af174..22d1c3820 100644 --- a/src/core/expr/fexpr_mean.cc +++ b/src/core/expr/fexpr_mean.cc @@ -81,15 +81,15 @@ class FExpr_Mean : public FExpr_ReduceUnary { } - template + template Column make(Column &&col, SType stype, const Groupby& gby, bool is_grouped) const { col.cast_inplace(stype); if (is_grouped) { - return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( + return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( std::move(col), stype, gby ))); } else { - return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( + return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( std::move(col), stype, gby ))); } From e872a44ccbfeffcb3a4ba8c1119802bb641d1dff Mon Sep 17 00:00:00 2001 From: samukweku Date: Fri, 24 Mar 2023 23:15:45 +1100 Subject: [PATCH 103/124] update code to use FExpr_ReduceUnary --- src/core/column/count_all_rows.h | 41 ------ src/core/column/countna.h | 8 +- .../{reduce_nullary.h => countna_no_args.h} | 24 ++-- src/core/column/mean.h | 6 +- src/core/column/minmax.h | 4 +- src/core/column/reduce_unary.h | 2 +- src/core/column/sumprod.h | 12 +- src/core/expr/fexpr_count_countna.cc | 134 ++++++++---------- src/core/expr/fexpr_mean.cc | 38 ++--- src/core/expr/fexpr_minmax.cc | 14 +- src/core/expr/fexpr_sumprod.cc | 10 +- src/datatable/expr/reduce.py | 2 +- 12 files changed, 122 insertions(+), 173 deletions(-) delete mode 100644 src/core/column/count_all_rows.h rename src/core/column/{reduce_nullary.h => countna_no_args.h} (74%) diff --git a/src/core/column/count_all_rows.h b/src/core/column/count_all_rows.h deleted file mode 100644 index de0a1a334..000000000 --- a/src/core/column/count_all_rows.h +++ /dev/null @@ -1,41 +0,0 @@ -//------------------------------------------------------------------------------ -// Copyright 2023 H2O.ai -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. -//------------------------------------------------------------------------------ -#ifndef dt_COLUMN_COUNTALLROWS_h -#define dt_COLUMN_COUNTALLROWS_h -#include "column/reduce_nullary.h" -namespace dt { - - -class CountAllRows_ColumnImpl : public ReduceNullary_ColumnImpl { - public: - using ReduceNullary_ColumnImpl::ReduceNullary_ColumnImpl; - - bool get_element(size_t i, int64_t* out) const override { - size_t i0, i1; - this->gby_.get_group(i, &i0, &i1); - *out = static_cast(i1 - i0); - return true; - } -}; - -} // namespace dt -#endif diff --git a/src/core/column/countna.h b/src/core/column/countna.h index 5c9bba6bb..c836866a7 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -25,13 +25,13 @@ namespace dt { -template -class Count_ColumnImpl : public ReduceUnary_ColumnImpl { +template +class Count_ColumnImpl : public ReduceUnary_ColumnImpl { public: - using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; + using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; bool get_element(size_t i, int64_t* out) const override { - T_IN value; + T value; size_t i0, i1; this->gby_.get_group(i, &i0, &i1); int64_t count = 0; diff --git a/src/core/column/reduce_nullary.h b/src/core/column/countna_no_args.h similarity index 74% rename from src/core/column/reduce_nullary.h rename to src/core/column/countna_no_args.h index 1019b3595..0e6b1b405 100644 --- a/src/core/column/reduce_nullary.h +++ b/src/core/column/countna_no_args.h @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2023 H2O.ai +// Copyright 2019-2021 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -19,35 +19,43 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS // IN THE SOFTWARE. //------------------------------------------------------------------------------ -#ifndef dt_COLUMN_REDUCE_NULLARY_h -#define dt_COLUMN_REDUCE_NULLARY_h +#ifndef dt_COLUMN_COUNTNA_ALLROWS_h +#define dt_COLUMN_COUNTNA_ALLROWS_h #include "column/virtual.h" #include "stype.h" namespace dt { -class ReduceNullary_ColumnImpl : public Virtual_ColumnImpl { +class CountAllRows_ColumnImpl : public Virtual_ColumnImpl { protected: Groupby gby_; - public: - ReduceNullary_ColumnImpl(const Groupby& gby, SType stype) - : Virtual_ColumnImpl(gby.size(), stype), + CountAllRows_ColumnImpl(const Groupby& gby) + : Virtual_ColumnImpl(gby.size(), SType::INT64), gby_(gby) {} ColumnImpl *clone() const override { - return new ReduceNullary_ColumnImpl(Groupby(gby_), this->stype()); + return new CountAllRows_ColumnImpl(Groupby(gby_)); } size_t n_children() const noexcept override { return 0; } + + bool get_element(size_t i, int64_t* out) const override { + size_t i0, i1; + this->gby_.get_group(i, &i0, &i1); + *out = static_cast(i1 - i0); + return true; + } }; + + } // namespace dt #endif diff --git a/src/core/column/mean.h b/src/core/column/mean.h index e3863ca95..f89b52d10 100644 --- a/src/core/column/mean.h +++ b/src/core/column/mean.h @@ -26,9 +26,9 @@ namespace dt { template -class Mean_ColumnImpl : public ReduceUnary_ColumnImpl { +class Mean_ColumnImpl : public ReduceUnary_ColumnImpl { public: - using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; + using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; bool get_element(size_t i, T* out) const override { T value; @@ -53,4 +53,4 @@ class Mean_ColumnImpl : public ReduceUnary_ColumnImpl { } // namespace dt -#endif +#endif \ No newline at end of file diff --git a/src/core/column/minmax.h b/src/core/column/minmax.h index 9bfdb2905..8a8b5a5db 100644 --- a/src/core/column/minmax.h +++ b/src/core/column/minmax.h @@ -26,9 +26,9 @@ namespace dt { template -class MinMax_ColumnImpl : public ReduceUnary_ColumnImpl { +class MinMax_ColumnImpl : public ReduceUnary_ColumnImpl { public: - using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; + using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; bool get_element(size_t i, T* out) const override { // res` will be updated on the first valid element, due to `res_isna` diff --git a/src/core/column/reduce_unary.h b/src/core/column/reduce_unary.h index a9f966b69..ec58ba745 100644 --- a/src/core/column/reduce_unary.h +++ b/src/core/column/reduce_unary.h @@ -26,7 +26,7 @@ namespace dt { -template +template class ReduceUnary_ColumnImpl : public Virtual_ColumnImpl { protected: Column col_; diff --git a/src/core/column/sumprod.h b/src/core/column/sumprod.h index 4055e4e2e..05c2e7423 100644 --- a/src/core/column/sumprod.h +++ b/src/core/column/sumprod.h @@ -27,13 +27,13 @@ namespace dt { template -class SumProd_ColumnImpl : public ReduceUnary_ColumnImpl { +class SumProd_ColumnImpl : public ReduceUnary_ColumnImpl { public: - using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; + using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; - bool get_element(size_t i, T_IN* out) const override { - T_IN result = !SUM; // 0 for `sum()` and 1 for `prod()` - T_IN value; + bool get_element(size_t i, T* out) const override { + T result = !SUM; // 0 for `sum()` and 1 for `prod()` + T value; size_t i0, i1; this->gby_.get_group(i, &i0, &i1); @@ -41,7 +41,7 @@ class SumProd_ColumnImpl : public ReduceUnary_ColumnImpl { size_t nrows = i1 - i0; bool is_valid = this->col_.get_element(i, &value); if (is_valid){ - result = SUM? static_cast(nrows) * value + result = SUM? static_cast(nrows) * value : ipow(value, nrows); } } else { diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index eadbe99cf..b6e2129b5 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2023 H2O.ai +// Copyright 2022-2023 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -22,9 +22,9 @@ #include "column/const.h" #include "column/latent.h" #include "column/countna.h" -#include "column/count_all_rows.h" +#include "column/countna_no_args.h" #include "documentation.h" -#include "expr/fexpr_func.h" +#include "expr/fexpr_reduce_unary.h" #include "expr/eval_context.h" #include "expr/workframe.h" #include "python/xargs.h" @@ -33,73 +33,17 @@ namespace dt { namespace expr { template -class FExpr_CountNA : public FExpr_Func { - private: - ptrExpr arg_; - +class FExpr_CountNA : public FExpr_ReduceUnary { public: - FExpr_CountNA(ptrExpr &&arg) - : arg_(std::move(arg)) {} - - std::string repr() const override { - std::string out = COUNTNA? "countna" : "count"; - out += '('; - if (arg_->get_expr_kind() != Kind::None) out += arg_->repr(); - out += ')'; - return out; - } - - - Workframe evaluate_n(EvalContext &ctx) const override { - Workframe outputs(ctx); - Workframe wf = arg_->evaluate_n(ctx); - Groupby gby = ctx.get_groupby(); - // this covers scenarios where - // we dont care about the presence or absence of NAs - // we just want the total number of rows - bool count_all_rows = arg_->get_expr_kind() == Kind::None; - - if (count_all_rows && !COUNTNA) { - Column coli; - auto value = static_cast(ctx.nrows()); - if (gby){ - coli = Column(new Latent_ColumnImpl(new CountAllRows_ColumnImpl(gby, SType::INT64))); - } else{ - coli = Const_ColumnImpl::make_int_column(1, value, SType::INT64); - } - outputs.add_column(std::move(coli), "count", Grouping::GtoONE); - return outputs; - } + using FExpr_ReduceUnary::FExpr_ReduceUnary; - if (!gby) { - gby = Groupby::single_group(wf.nrows()); - } - - if (count_all_rows && COUNTNA){ - Column coli = Const_ColumnImpl::make_int_column(gby.size(), 0, SType::INT64); - outputs.add_column(std::move(coli), std::string(), Grouping::GtoONE); - return outputs; - } - for (size_t i = 0; i < wf.ncols(); ++i) { - bool is_grouped = ctx.has_group_column( - wf.get_frame_id(i), - wf.get_column_id(i) - ); - Column coli = wf.retrieve_column(i); - if (COUNTNA && !ctx.has_groupby() && (coli.stype() == SType::VOID)) { - int64_t nrows = static_cast(ctx.nrows()); - coli = Const_ColumnImpl::make_int_column(1, nrows, SType::INT64); - } else { - coli = evaluate1(std::move(coli), gby, is_grouped); - } - outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); - } - return outputs; + std::string name() const override { + return COUNTNA? "countna" + : "count"; } - - Column evaluate1(Column &&col, const Groupby& gby, bool is_grouped) const { + Column evaluate1(Column&& col, const Groupby& gby, bool is_grouped) const { SType stype = col.stype(); switch (stype) { case SType::VOID: @@ -127,27 +71,74 @@ class FExpr_CountNA : public FExpr_Func { } } - template - Column make(Column &&col, SType stype, const Groupby& gby, bool is_grouped) const { + template + Column make(Column&& col, SType stype, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new Count_ColumnImpl( + return Column(new Latent_ColumnImpl(new Count_ColumnImpl( std::move(col), stype, gby ))); } else { - return Column(new Latent_ColumnImpl(new Count_ColumnImpl( + return Column(new Latent_ColumnImpl(new Count_ColumnImpl( std::move(col), stype, gby ))); } } }; + +// gets the count of all rows - nulls are not checked +template +class FExpr_CountNA_AllRows : public FExpr_Func { + public: + FExpr_CountNA_AllRows(){} + + std::string repr() const override { + std::string out = COUNTNA ? "countna(None)" + : "count()"; + return out; + } + + Workframe evaluate_n(EvalContext &ctx) const override { + Workframe wf(ctx); + Groupby gby = ctx.get_groupby(); + Column col; + + if (!gby) { + gby = Groupby::single_group(wf.nrows()); + } + + if (COUNTNA) { + col = Const_ColumnImpl::make_int_column(gby.size(), 0, SType::INT64); + wf.add_column(std::move(col), std::string(), Grouping::GtoONE); + return wf; + } + + if (ctx.has_groupby()) { + col = Column(new Latent_ColumnImpl(new CountAllRows_ColumnImpl(gby))); + } else { + auto value = static_cast(ctx.nrows()); + col = Const_ColumnImpl::make_int_column(1, value, SType::INT64); + } + wf.add_column(std::move(col), "count", Grouping::GtoONE); + return wf; + } + +}; + + static py::oobj pyfn_count(const py::XArgs &args) { auto count = args[0].to_oobj_or_none(); + if (count.is_none()) { + return PyFExpr::make(new FExpr_CountNA_AllRows()); + } return PyFExpr::make(new FExpr_CountNA(as_fexpr(count))); } static py::oobj pyfn_countna(const py::XArgs &args) { - auto countna = args[0].to_oobj(); + auto countna = args[0].to_oobj_or_none(); + if (countna.is_none()) { + return PyFExpr::make(new FExpr_CountNA_AllRows()); + } return PyFExpr::make(new FExpr_CountNA(as_fexpr(countna))); } @@ -163,8 +154,7 @@ DECLARE_PYFN(&pyfn_countna) ->name("countna") ->docs(doc_dt_countna) ->arg_names({"cols"}) - ->n_positional_args(1) - ->n_required_args(1); + ->n_positional_args(1); }} // dt::expr diff --git a/src/core/expr/fexpr_mean.cc b/src/core/expr/fexpr_mean.cc index 22d1c3820..baf2b472b 100644 --- a/src/core/expr/fexpr_mean.cc +++ b/src/core/expr/fexpr_mean.cc @@ -53,22 +53,16 @@ class FExpr_Mean : public FExpr_ReduceUnary { case SType::BOOL: case SType::INT8: case SType::INT16: - case SType::INT32: + case SType::INT32: case SType::INT64: + case SType::DATE32: + case SType::TIME64: case SType::FLOAT64: - return make(std::move(col), SType::FLOAT64, gby, is_grouped); + col_out = make(std::move(col), SType::FLOAT64, gby, is_grouped); + break; case SType::FLOAT32: - return make(std::move(col), SType::FLOAT32, gby, is_grouped); - case SType::DATE32: { - Column coli = make(std::move(col), SType::FLOAT64, gby, is_grouped); - coli.cast_inplace(SType::DATE32); - return coli; - } - case SType::TIME64: { - Column coli = make(std::move(col), SType::FLOAT64, gby, is_grouped); - coli.cast_inplace(SType::TIME64); - return coli; - } + col_out = make(std::move(col), SType::FLOAT32, gby, is_grouped); + break; default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr(); @@ -82,17 +76,13 @@ class FExpr_Mean : public FExpr_ReduceUnary { template - Column make(Column &&col, SType stype, const Groupby& gby, bool is_grouped) const { + Column make(Column&& col, SType stype, const Groupby& gby, bool is_grouped) const { col.cast_inplace(stype); - if (is_grouped) { - return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( - std::move(col), stype, gby - ))); - } else { - return Column(new Latent_ColumnImpl(new Mean_ColumnImpl( - std::move(col), stype, gby - ))); - } + + return is_grouped? std::move(col) + : Column(new Latent_ColumnImpl(new Mean_ColumnImpl( + std::move(col), stype, gby + ))); } }; @@ -110,4 +100,4 @@ DECLARE_PYFN(&pyfn_mean) ->n_required_args(1); -}} // dt::expr +}} // dt::expr \ No newline at end of file diff --git a/src/core/expr/fexpr_minmax.cc b/src/core/expr/fexpr_minmax.cc index 9fd81fa35..0bba8253e 100644 --- a/src/core/expr/fexpr_minmax.cc +++ b/src/core/expr/fexpr_minmax.cc @@ -54,17 +54,19 @@ class FExpr_MinMax : public FExpr_ReduceUnary { case SType::INT8: return make(std::move(col), SType::INT8, gby, is_grouped); case SType::INT16: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::INT16, gby, is_grouped); case SType::INT32: + return make(std::move(col), SType::INT32, gby, is_grouped); case SType::DATE32: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::DATE32, gby, is_grouped); case SType::INT64: + return make(std::move(col), SType::INT64, gby, is_grouped); case SType::TIME64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::TIME64, gby, is_grouped); case SType::FLOAT32: return make(std::move(col), SType::FLOAT32, gby, is_grouped); case SType::FLOAT64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::FLOAT64, gby, is_grouped); default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr(); @@ -73,10 +75,10 @@ class FExpr_MinMax : public FExpr_ReduceUnary { template - Column make(Column&& col, const Groupby& gby, bool is_grouped) const { + Column make(Column&& col, SType stype, const Groupby& gby, bool is_grouped) const { return is_grouped? std::move(col) : Column(new Latent_ColumnImpl(new MinMax_ColumnImpl( - std::move(col), gby + std::move(col), stype, gby ))); } diff --git a/src/core/expr/fexpr_sumprod.cc b/src/core/expr/fexpr_sumprod.cc index 5284e1fc6..7f68895d9 100644 --- a/src/core/expr/fexpr_sumprod.cc +++ b/src/core/expr/fexpr_sumprod.cc @@ -67,15 +67,15 @@ class FExpr_SumProd : public FExpr_ReduceUnary { } - template - Column make(Column &&col, SType stype, const Groupby& gby, bool is_grouped) const { + template + Column make(Column&& col, SType stype, const Groupby& gby, bool is_grouped) const { col.cast_inplace(stype); if (is_grouped) { - return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( + return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( std::move(col), stype, gby ))); } else { - return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( + return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( std::move(col), stype, gby ))); } @@ -110,4 +110,4 @@ DECLARE_PYFN(&pyfn_prod) ->n_required_args(1); -}} // dt::expr +}} // dt::expr \ No newline at end of file diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index b9ffd884b..2b726857d 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -109,7 +109,7 @@ def corr(col1, col2): # noinspection PyShadowingBuiltins -def sum(iterable, start=0): +def sum(iterable=None, start=0): if (not isinstance(iterable, dict) and (isinstance(iterable, core.FExpr) or (iterable and hasattr(iterable, "__getitem__") and isinstance(iterable[0], core.FExpr)))): From 514610f8d54e566c68716620bf23e5a4c07380d8 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sat, 25 Mar 2023 18:19:25 +1100 Subject: [PATCH 104/124] add example for countna when no col is passed --- docs/api/dt/countna.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/api/dt/countna.rst b/docs/api/dt/countna.rst index 932fbab5d..44c7f4a87 100644 --- a/docs/api/dt/countna.rst +++ b/docs/api/dt/countna.rst @@ -65,6 +65,16 @@ 0 | 2 [1 row x 1 column] + Get the count if no col is passed: + + >>> df[:, dt.countna()] + | C0 + | int64 + -- + ----- + 0 | 0 + [1 row x 1 column] + + See Also From b0c513a0a854cfdbe45307870e34081bb159ecf5 Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Sat, 1 Apr 2023 13:29:16 +1100 Subject: [PATCH 105/124] Update src/core/expr/fexpr_count_countna.cc Co-authored-by: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> --- src/core/expr/fexpr_count_countna.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index b6e2129b5..4cf17226b 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -116,8 +116,8 @@ class FExpr_CountNA_AllRows : public FExpr_Func { if (ctx.has_groupby()) { col = Column(new Latent_ColumnImpl(new CountAllRows_ColumnImpl(gby))); } else { - auto value = static_cast(ctx.nrows()); - col = Const_ColumnImpl::make_int_column(1, value, SType::INT64); + auto value = static_cast(ctx.nrows()); + col = Const_ColumnImpl::make_int_column(1, value, SType::INT64); } wf.add_column(std::move(col), "count", Grouping::GtoONE); return wf; From f28e2de14ee1d5c0f894e6096b8699f410d8cdf9 Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Sat, 1 Apr 2023 13:29:43 +1100 Subject: [PATCH 106/124] Update src/core/expr/fexpr_count_countna.cc Co-authored-by: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> --- src/core/expr/fexpr_count_countna.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index 4cf17226b..a25a9ee68 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -109,7 +109,7 @@ class FExpr_CountNA_AllRows : public FExpr_Func { if (COUNTNA) { col = Const_ColumnImpl::make_int_column(gby.size(), 0, SType::INT64); - wf.add_column(std::move(col), std::string(), Grouping::GtoONE); + wf.add_column(std::move(col), "countna", Grouping::GtoONE); return wf; } From bf7188963b6ed10cd9e17bda06f63bf87877986c Mon Sep 17 00:00:00 2001 From: samukweku Date: Sat, 1 Apr 2023 15:17:33 +1100 Subject: [PATCH 107/124] updates based on feedback --- src/core/column/mean.h | 3 +- src/core/expr/fexpr_count_countna.cc | 44 ++++++++++++++-------------- src/core/expr/fexpr_mean.cc | 8 ++--- src/core/expr/fexpr_sumprod.cc | 13 ++++---- tests/dt/test-countna.py | 3 +- 5 files changed, 37 insertions(+), 34 deletions(-) diff --git a/src/core/column/mean.h b/src/core/column/mean.h index f89b52d10..b5010696c 100644 --- a/src/core/column/mean.h +++ b/src/core/column/mean.h @@ -53,4 +53,5 @@ class Mean_ColumnImpl : public ReduceUnary_ColumnImpl { } // namespace dt -#endif \ No newline at end of file +#endif + diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index a25a9ee68..cf3920ac8 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -33,7 +33,7 @@ namespace dt { namespace expr { template -class FExpr_CountNA : public FExpr_ReduceUnary { +class FExpr_Count : public FExpr_ReduceUnary { public: using FExpr_ReduceUnary::FExpr_ReduceUnary; @@ -49,22 +49,22 @@ class FExpr_CountNA : public FExpr_ReduceUnary { case SType::VOID: case SType::BOOL: case SType::INT8: - return make(std::move(col), SType::INT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::INT16: - return make(std::move(col), SType::INT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::DATE32: case SType::INT32: - return make(std::move(col), SType::INT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::TIME64: case SType::INT64: - return make(std::move(col), SType::INT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::FLOAT32: - return make(std::move(col), SType::INT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::FLOAT64: - return make(std::move(col), SType::INT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::STR32: case SType::STR64: - return make(std::move(col), SType::INT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr(); @@ -72,14 +72,14 @@ class FExpr_CountNA : public FExpr_ReduceUnary { } template - Column make(Column&& col, SType stype, const Groupby& gby, bool is_grouped) const { + Column make(Column&& col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { return Column(new Latent_ColumnImpl(new Count_ColumnImpl( - std::move(col), stype, gby + std::move(col), SType::INT64, gby ))); } else { return Column(new Latent_ColumnImpl(new Count_ColumnImpl( - std::move(col), stype, gby + std::move(col), SType::INT64, gby ))); } } @@ -88,13 +88,13 @@ class FExpr_CountNA : public FExpr_ReduceUnary { // gets the count of all rows - nulls are not checked template -class FExpr_CountNA_AllRows : public FExpr_Func { +class FExpr_Count_Rows : public FExpr_Func { public: - FExpr_CountNA_AllRows(){} + FExpr_Count_Rows(){} std::string repr() const override { - std::string out = COUNTNA ? "countna(None)" - : "count()"; + std::string out = COUNTNA? "countna(None)" + : "count()"; return out; } @@ -127,19 +127,19 @@ class FExpr_CountNA_AllRows : public FExpr_Func { static py::oobj pyfn_count(const py::XArgs &args) { - auto count = args[0].to_oobj_or_none(); - if (count.is_none()) { + auto arg = args[0].to_oobj_or_none(); + if (arg.is_none()) { return PyFExpr::make(new FExpr_CountNA_AllRows()); } - return PyFExpr::make(new FExpr_CountNA(as_fexpr(count))); + return PyFExpr::make(new FExpr_Count(as_fexpr(arg))); } static py::oobj pyfn_countna(const py::XArgs &args) { - auto countna = args[0].to_oobj_or_none(); - if (countna.is_none()) { - return PyFExpr::make(new FExpr_CountNA_AllRows()); + auto arg = args[0].to_oobj_or_none(); + if (arg.is_none()) { + return PyFExpr::make(new FExpr_Count_Rows()); } - return PyFExpr::make(new FExpr_CountNA(as_fexpr(countna))); + return PyFExpr::make(new FExpr_Count(as_fexpr(arg))); } diff --git a/src/core/expr/fexpr_mean.cc b/src/core/expr/fexpr_mean.cc index baf2b472b..3a0f2c73b 100644 --- a/src/core/expr/fexpr_mean.cc +++ b/src/core/expr/fexpr_mean.cc @@ -58,10 +58,10 @@ class FExpr_Mean : public FExpr_ReduceUnary { case SType::DATE32: case SType::TIME64: case SType::FLOAT64: - col_out = make(std::move(col), SType::FLOAT64, gby, is_grouped); + col_out = make(std::move(col), gby, is_grouped); break; case SType::FLOAT32: - col_out = make(std::move(col), SType::FLOAT32, gby, is_grouped); + col_out = make(std::move(col), gby, is_grouped); break; default: throw TypeError() @@ -76,12 +76,12 @@ class FExpr_Mean : public FExpr_ReduceUnary { template - Column make(Column&& col, SType stype, const Groupby& gby, bool is_grouped) const { + Column make(Column&& col, const Groupby& gby, bool is_grouped) const { col.cast_inplace(stype); return is_grouped? std::move(col) : Column(new Latent_ColumnImpl(new Mean_ColumnImpl( - std::move(col), stype, gby + std::move(col), col.stype(), gby ))); } }; diff --git a/src/core/expr/fexpr_sumprod.cc b/src/core/expr/fexpr_sumprod.cc index 7f68895d9..f5f93cee5 100644 --- a/src/core/expr/fexpr_sumprod.cc +++ b/src/core/expr/fexpr_sumprod.cc @@ -55,11 +55,11 @@ class FExpr_SumProd : public FExpr_ReduceUnary { case SType::INT16: case SType::INT32: case SType::INT64: - return make(std::move(col), SType::INT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::FLOAT32: - return make(std::move(col), SType::FLOAT32, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::FLOAT64: - return make(std::move(col), SType::FLOAT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr(); @@ -72,11 +72,11 @@ class FExpr_SumProd : public FExpr_ReduceUnary { col.cast_inplace(stype); if (is_grouped) { return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( - std::move(col), stype, gby + std::move(col), col.stype(), gby ))); } else { return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( - std::move(col), stype, gby + std::move(col), col.stype(), gby ))); } } @@ -110,4 +110,5 @@ DECLARE_PYFN(&pyfn_prod) ->n_required_args(1); -}} // dt::expr \ No newline at end of file +}} // dt::expr + diff --git a/tests/dt/test-countna.py b/tests/dt/test-countna.py index 4b085e35e..186dcc582 100644 --- a/tests/dt/test-countna.py +++ b/tests/dt/test-countna.py @@ -91,4 +91,5 @@ def test_dt_countna_None(): DT = dt.Frame(G=[1,1,1,2,2,2], V=[None, None, None, None, 3, 5]) EXP = dt.Frame(C0=[0]) RES = DT[:, dt.countna()] - assert EXP.to_list() == RES.to_list() \ No newline at end of file + assert EXP.to_list() == RES.to_list() + From 74d5089f7057bd6af3ec9f127d826492c1fd473b Mon Sep 17 00:00:00 2001 From: samukweku Date: Sat, 1 Apr 2023 15:19:46 +1100 Subject: [PATCH 108/124] remove xfail for countna --- tests/dt/test-countna.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/dt/test-countna.py b/tests/dt/test-countna.py index 186dcc582..17ad66d54 100644 --- a/tests/dt/test-countna.py +++ b/tests/dt/test-countna.py @@ -68,7 +68,6 @@ def test_dt_count_na1(src): RES = df[:, dt.countna(f[:])] assert_equals(EXP, RES) -@pytest.mark.xfail(reason="commented out till #3417 is resolved.") def test_dt_count_na2(): DT = dt.Frame(G=[1,1,1,2,2,2], V=[None, None, None, None, 3, 5]) EXP = dt.Frame(G=[1,2], V1=[3,1], V2=[1,0]) @@ -93,3 +92,4 @@ def test_dt_countna_None(): RES = DT[:, dt.countna()] assert EXP.to_list() == RES.to_list() + From d852148bacfff7c3aa9ee62bf40ecdecec5e8cb6 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sat, 1 Apr 2023 15:32:56 +1100 Subject: [PATCH 109/124] cleanup --- src/core/column/countna_no_args.h | 6 +++--- src/core/expr/fexpr_count_countna.cc | 4 ++-- src/core/expr/fexpr_mean.cc | 10 +++++----- src/core/expr/fexpr_sumprod.cc | 11 +++++------ 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/core/column/countna_no_args.h b/src/core/column/countna_no_args.h index 0e6b1b405..5766de944 100644 --- a/src/core/column/countna_no_args.h +++ b/src/core/column/countna_no_args.h @@ -26,19 +26,19 @@ namespace dt { -class CountAllRows_ColumnImpl : public Virtual_ColumnImpl { +class CountRows_ColumnImpl : public Virtual_ColumnImpl { protected: Groupby gby_; public: - CountAllRows_ColumnImpl(const Groupby& gby) + CountRows_ColumnImpl(const Groupby& gby) : Virtual_ColumnImpl(gby.size(), SType::INT64), gby_(gby) {} ColumnImpl *clone() const override { - return new CountAllRows_ColumnImpl(Groupby(gby_)); + return new CountRows_ColumnImpl(Groupby(gby_)); } size_t n_children() const noexcept override { diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index cf3920ac8..3aacbb1a4 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -114,7 +114,7 @@ class FExpr_Count_Rows : public FExpr_Func { } if (ctx.has_groupby()) { - col = Column(new Latent_ColumnImpl(new CountAllRows_ColumnImpl(gby))); + col = Column(new Latent_ColumnImpl(new CountRows_ColumnImpl(gby))); } else { auto value = static_cast(ctx.nrows()); col = Const_ColumnImpl::make_int_column(1, value, SType::INT64); @@ -129,7 +129,7 @@ class FExpr_Count_Rows : public FExpr_Func { static py::oobj pyfn_count(const py::XArgs &args) { auto arg = args[0].to_oobj_or_none(); if (arg.is_none()) { - return PyFExpr::make(new FExpr_CountNA_AllRows()); + return PyFExpr::make(new FExpr_Count_Rows()); } return PyFExpr::make(new FExpr_Count(as_fexpr(arg))); } diff --git a/src/core/expr/fexpr_mean.cc b/src/core/expr/fexpr_mean.cc index 3a0f2c73b..99c70cb4e 100644 --- a/src/core/expr/fexpr_mean.cc +++ b/src/core/expr/fexpr_mean.cc @@ -58,10 +58,10 @@ class FExpr_Mean : public FExpr_ReduceUnary { case SType::DATE32: case SType::TIME64: case SType::FLOAT64: - col_out = make(std::move(col), gby, is_grouped); + col_out = make(std::move(col), SType::FLOAT64, gby, is_grouped); break; case SType::FLOAT32: - col_out = make(std::move(col), gby, is_grouped); + col_out = make(std::move(col), SType::FLOAT32, gby, is_grouped); break; default: throw TypeError() @@ -76,12 +76,12 @@ class FExpr_Mean : public FExpr_ReduceUnary { template - Column make(Column&& col, const Groupby& gby, bool is_grouped) const { + Column make(Column&& col, SType stype, const Groupby& gby, bool is_grouped) const { col.cast_inplace(stype); return is_grouped? std::move(col) : Column(new Latent_ColumnImpl(new Mean_ColumnImpl( - std::move(col), col.stype(), gby + std::move(col), stype, gby ))); } }; @@ -100,4 +100,4 @@ DECLARE_PYFN(&pyfn_mean) ->n_required_args(1); -}} // dt::expr \ No newline at end of file +}} // dt::expr diff --git a/src/core/expr/fexpr_sumprod.cc b/src/core/expr/fexpr_sumprod.cc index f5f93cee5..0a5205666 100644 --- a/src/core/expr/fexpr_sumprod.cc +++ b/src/core/expr/fexpr_sumprod.cc @@ -55,11 +55,11 @@ class FExpr_SumProd : public FExpr_ReduceUnary { case SType::INT16: case SType::INT32: case SType::INT64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::INT64, gby, is_grouped); case SType::FLOAT32: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::FLOAT32, gby, is_grouped); case SType::FLOAT64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), SType::FLOAT64, gby, is_grouped); default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr(); @@ -72,11 +72,11 @@ class FExpr_SumProd : public FExpr_ReduceUnary { col.cast_inplace(stype); if (is_grouped) { return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( - std::move(col), col.stype(), gby + std::move(col), stype, gby ))); } else { return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( - std::move(col), col.stype(), gby + std::move(col), stype, gby ))); } } @@ -111,4 +111,3 @@ DECLARE_PYFN(&pyfn_prod) }} // dt::expr - From 6371cf5731c102143a0996110c7092e9a4a606fd Mon Sep 17 00:00:00 2001 From: samukweku Date: Sat, 1 Apr 2023 15:40:12 +1100 Subject: [PATCH 110/124] updates based on feedback --- src/core/expr/fexpr_minmax.cc | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/core/expr/fexpr_minmax.cc b/src/core/expr/fexpr_minmax.cc index 0bba8253e..5feda06b5 100644 --- a/src/core/expr/fexpr_minmax.cc +++ b/src/core/expr/fexpr_minmax.cc @@ -52,21 +52,21 @@ class FExpr_MinMax : public FExpr_ReduceUnary { return Column(new ConstNa_ColumnImpl(gby.size(), stype)); case SType::BOOL: case SType::INT8: - return make(std::move(col), SType::INT8, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::INT16: - return make(std::move(col), SType::INT16, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::INT32: - return make(std::move(col), SType::INT32, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::DATE32: - return make(std::move(col), SType::DATE32, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::INT64: - return make(std::move(col), SType::INT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::TIME64: - return make(std::move(col), SType::TIME64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::FLOAT32: - return make(std::move(col), SType::FLOAT32, gby, is_grouped); + return make(std::move(col), gby, is_grouped); case SType::FLOAT64: - return make(std::move(col), SType::FLOAT64, gby, is_grouped); + return make(std::move(col), gby, is_grouped); default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr(); @@ -75,10 +75,10 @@ class FExpr_MinMax : public FExpr_ReduceUnary { template - Column make(Column&& col, SType stype, const Groupby& gby, bool is_grouped) const { + Column make(Column&& col, const Groupby& gby, bool is_grouped) const { return is_grouped? std::move(col) : Column(new Latent_ColumnImpl(new MinMax_ColumnImpl( - std::move(col), stype, gby + std::move(col), col.stype(), gby ))); } From aa076ac8137ed37e12b3796747d4a9cbc318bc45 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 2 Apr 2023 13:32:41 +1000 Subject: [PATCH 111/124] no need to check for gby --- src/core/expr/fexpr_count_countna.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index 3aacbb1a4..e110fcde5 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -103,10 +103,6 @@ class FExpr_Count_Rows : public FExpr_Func { Groupby gby = ctx.get_groupby(); Column col; - if (!gby) { - gby = Groupby::single_group(wf.nrows()); - } - if (COUNTNA) { col = Const_ColumnImpl::make_int_column(gby.size(), 0, SType::INT64); wf.add_column(std::move(col), "countna", Grouping::GtoONE); From a9a45dd354d895678616307aca318cac912895a7 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 2 Apr 2023 13:33:52 +1000 Subject: [PATCH 112/124] fix indent --- src/core/expr/fexpr_count_countna.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc index e110fcde5..6a863aa80 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count_countna.cc @@ -112,9 +112,9 @@ class FExpr_Count_Rows : public FExpr_Func { if (ctx.has_groupby()) { col = Column(new Latent_ColumnImpl(new CountRows_ColumnImpl(gby))); } else { - auto value = static_cast(ctx.nrows()); - col = Const_ColumnImpl::make_int_column(1, value, SType::INT64); - } + auto value = static_cast(ctx.nrows()); + col = Const_ColumnImpl::make_int_column(1, value, SType::INT64); + } wf.add_column(std::move(col), "count", Grouping::GtoONE); return wf; } From f53f1c8d4ce3cc9ce9c12af1a89baa4fc23dad44 Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Tue, 18 Apr 2023 09:05:51 +1000 Subject: [PATCH 113/124] Update docs/api/dt/countna.rst Co-authored-by: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> --- docs/api/dt/countna.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api/dt/countna.rst b/docs/api/dt/countna.rst index 44c7f4a87..3c3ed4cf3 100644 --- a/docs/api/dt/countna.rst +++ b/docs/api/dt/countna.rst @@ -65,7 +65,7 @@ 0 | 2 [1 row x 1 column] - Get the count if no col is passed: + When no `cols` is passed, the number of missing values returned is zero: >>> df[:, dt.countna()] | C0 From dfa017fd304729f4695af7e00e8c53dfb2249a7b Mon Sep 17 00:00:00 2001 From: samukweku Date: Tue, 18 Apr 2023 09:06:32 +1000 Subject: [PATCH 114/124] focus only on count --- src/datatable/expr/reduce.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index 2b726857d..d696abac1 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -109,17 +109,17 @@ def corr(col1, col2): # noinspection PyShadowingBuiltins -def sum(iterable=None, start=0): +def sum(iterable, start=0): if (not isinstance(iterable, dict) and (isinstance(iterable, core.FExpr) - or (iterable and hasattr(iterable, "__getitem__") and isinstance(iterable[0], core.FExpr)))): + or (hasattr(iterable, "__getitem__") and isinstance(iterable[0], core.FExpr)))): return core.sum(iterable) elif isinstance(iterable, dict) and isinstance([*iterable.values()][0], core.FExpr): return core.sum(iterable) elif isinstance(iterable, core.Frame): return iterable.sum() else: - return _builtin_sum(iterable, start) + return _builtin_sum(iterable, start) # noinspection PyShadowingBuiltins def min(*args, **kwds): From 4f5d071e6d7fbb786d82ee6be250f99fc287a48b Mon Sep 17 00:00:00 2001 From: samukweku Date: Tue, 18 Apr 2023 09:07:23 +1000 Subject: [PATCH 115/124] return FExpr --- docs/api/dt/countna.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api/dt/countna.rst b/docs/api/dt/countna.rst index 3c3ed4cf3..0faa835e7 100644 --- a/docs/api/dt/countna.rst +++ b/docs/api/dt/countna.rst @@ -14,7 +14,7 @@ cols: FExpr Input columns. - return: Expr + return: FExpr f-expression having one row, and the same names and number of columns as in `cols`. All the returned column stypes are `int64`. If `cols` is not provided, 0 is returned per group. From 8397a8ff6955daf00dca81070bebbd006d3d5b29 Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Tue, 18 Apr 2023 16:31:59 +1000 Subject: [PATCH 116/124] Update countna.h --- src/core/column/countna.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/column/countna.h b/src/core/column/countna.h index c836866a7..7b488162c 100644 --- a/src/core/column/countna.h +++ b/src/core/column/countna.h @@ -57,3 +57,4 @@ class Count_ColumnImpl : public ReduceUnary_ColumnImpl { } // namespace dt #endif + From d61ac2e2516e5607aef1206cf2e781050e027371 Mon Sep 17 00:00:00 2001 From: Oleksiy Kononenko Date: Fri, 21 Apr 2023 13:53:21 -0700 Subject: [PATCH 117/124] Various fixes --- docs/api/dt/count.rst | 60 ++++++++------- docs/api/dt/countna.rst | 53 +++++++------ .../column/{countna_no_args.h => count.h} | 47 ++++++++++-- src/core/column/countna.h | 60 --------------- src/core/column/reduce_unary.h | 14 ++-- ...{fexpr_count_countna.cc => fexpr_count.cc} | 74 +++++++++---------- src/core/expr/fexpr_mean.cc | 3 +- src/core/expr/fexpr_minmax.cc | 4 +- src/core/expr/fexpr_sumprod.cc | 4 +- src/core/expr/head_func.cc | 8 -- src/core/expr/head_reduce_nullary.cc | 51 ------------- src/datatable/__init__.py | 4 +- src/datatable/expr/__init__.py | 4 +- src/datatable/expr/reduce.py | 39 +++++----- 14 files changed, 173 insertions(+), 252 deletions(-) rename src/core/column/{countna_no_args.h => count.h} (59%) delete mode 100644 src/core/column/countna.h rename src/core/expr/{fexpr_count_countna.cc => fexpr_count.cc} (72%) delete mode 100644 src/core/expr/head_reduce_nullary.cc diff --git a/docs/api/dt/count.rst b/docs/api/dt/count.rst index a62f60537..6b4e20e11 100644 --- a/docs/api/dt/count.rst +++ b/docs/api/dt/count.rst @@ -1,27 +1,26 @@ .. xfunction:: datatable.count - :src: src/core/expr/fexpr_count_countna.cc pyfn_count + :src: src/core/expr/fexpr_count.cc pyfn_count :cvar: doc_dt_count :tests: tests/test-reduce.py :signature: count(cols=None) - Calculate the number of non-missing values for each column from `cols`. When `cols` is not provided, - calculate the total number of rows. + Count non-missing values for each column from `cols`. When called + with no arguments, simply count the number of rows. This function + is group-aware. Parameters ---------- cols: FExpr Input columns if any. - return: Expr - f-expression having one row, and the same names and number of columns - as in `cols`. All the returned column stypes are `int64`. - If `cols` is not provided, the total number of rows - (a combination of the count of missing and non-missing values) is returned. - - except: TypeError - The exception is raised when one of the columns from `cols` - has a non-numeric and non-string type. + return: FExpr + f-expression that counts the number of non-missing values + for each column from `cols`. If `cols` is not provided, + it calculates the number of rows. The returned f-expression + has as many rows as there are groups, it also has the same names and + number of columns as in `cols`. All the resulting column's stypes + are `int64`. Examples @@ -31,39 +30,48 @@ >>> from datatable import dt, f >>> - >>> df = dt.Frame({'A': [1, 1, 2, 1, 2], + >>> DT = dt.Frame({'A': [None, 1, 2, None, 2], ... 'B': [None, 2, 3, 4, 5], ... 'C': [1, 2, 1, 1, 2]}) - >>> df + >>> DT | A B C | int32 int32 int32 -- + ----- ----- ----- - 0 | 1 NA 1 + 0 | NA NA 1 1 | 1 2 2 2 | 2 3 1 - 3 | 1 4 1 + 3 | NA 4 1 4 | 2 5 2 [5 rows x 3 columns] - Get the count of all rows:: + Count non-missing values in all the columns:: - >>> df[:, dt.count()] - | count - | int32 - -- + ----- - 0 | 5 - [1 row x 1 column] + >>> DT[:, dt.count(f[:])] + | A B C + | int64 int64 int64 + -- + ----- ----- ----- + 0 | 3 4 5 + [1 row x 3 columns] - Get the count of column `B` (note how the null row is excluded from the - count result):: + Count non-missing values in the column `B` only:: - >>> df[:, dt.count(f.B)] + >>> DT[:, dt.count(f.B)] | B | int64 -- + ----- 0 | 4 [1 row x 1 column] + Get the number of rows in a frame:: + + >>> DT[:, dt.count()] + | count + | int64 + -- + ----- + 0 | 5 + [1 row x 1 column] + + See Also -------- diff --git a/docs/api/dt/countna.rst b/docs/api/dt/countna.rst index 0faa835e7..9ebfe1e08 100644 --- a/docs/api/dt/countna.rst +++ b/docs/api/dt/countna.rst @@ -1,13 +1,15 @@ .. xfunction:: datatable.countna - :src: src/core/expr/fexpr_count_countna.cc pyfn_countna + :src: src/core/expr/fexpr_count.cc pyfn_countna :tests: tests/test-reduce.py :cvar: doc_dt_countna - :signature: countna(cols) + :signature: countna(cols=None) .. x-version-added:: 1.1.0 - Count the number of NA values for each column from `cols`. + Count missing values for each column from `cols`. This function + is group-aware. + Parameters ---------- @@ -15,14 +17,12 @@ Input columns. return: FExpr - f-expression having one row, and the same names and number of columns - as in `cols`. All the returned column stypes are `int64`. - If `cols` is not provided, 0 is returned per group. - - except: TypeError - The exception is raised when one of the columns from `cols` - has an obj64 type. - + f-expression that counts the number of missing values + for each column from `cols`. If `cols` is not provided, + it will return `0` for each of the frame's group. + The returned f-expression has as many rows as there are groups, + it also has the same names and number of columns as in `cols`. + All the resulting column's stypes are `int64`. Examples @@ -32,42 +32,41 @@ >>> from datatable import dt, f >>> - >>> df = dt.Frame({'A': [1, 1, 2, None, 1, 2], - ... 'B': [None, 2, 3, 4, None, 5], - ... 'C': [1, 2, 1, 1, 2, 4]}) - >>> df + >>> DT = dt.Frame({'A': [None, 1, 2, None, 2], + ... 'B': [None, 2, 3, 4, 5], + ... 'C': [1, 2, 1, 1, 2]}) + >>> DT | A B C | int32 int32 int32 -- + ----- ----- ----- - 0 | 1 NA 1 + 0 | NA NA 1 1 | 1 2 2 2 | 2 3 1 3 | NA 4 1 - 4 | 1 NA 2 - 5 | 2 5 4 - [6 rows x 3 columns] + 4 | 2 5 2 + [5 rows x 3 columns] - Get the count of NAs of all rows: + Count missing values in all the columns:: - >>> df[:, dt.countna(f[:])] + >>> DT[:, dt.countna(f[:])] | A B C | int64 int64 int64 -- + ----- ----- ----- - 0 | 1 2 0 + 0 | 2 1 0 [1 row x 3 columns] - Get the count of NAs of column `B`: + Count missing values in the column `B` only:: - >>> df[:, dt.countna(f.B)] + >>> DT[:, dt.countna(f.B)] | B | int64 -- + ----- - 0 | 2 + 0 | 1 [1 row x 1 column] - When no `cols` is passed, the number of missing values returned is zero: + When no `cols` is passed, this function will always return zero:: - >>> df[:, dt.countna()] + >>> DT[:, dt.countna()] | C0 | int64 -- + ----- diff --git a/src/core/column/countna_no_args.h b/src/core/column/count.h similarity index 59% rename from src/core/column/countna_no_args.h rename to src/core/column/count.h index 5766de944..4facb861f 100644 --- a/src/core/column/countna_no_args.h +++ b/src/core/column/count.h @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2019-2021 H2O.ai +// Copyright 2023 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -19,26 +19,59 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS // IN THE SOFTWARE. //------------------------------------------------------------------------------ -#ifndef dt_COLUMN_COUNTNA_ALLROWS_h -#define dt_COLUMN_COUNTNA_ALLROWS_h +#ifndef dt_COLUMN_COUNT_h +#define dt_COLUMN_COUNT_h #include "column/virtual.h" +#include "column/reduce_unary.h" #include "stype.h" namespace dt { -class CountRows_ColumnImpl : public Virtual_ColumnImpl { +template +class CountUnary_ColumnImpl : public ReduceUnary_ColumnImpl { + public: + using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; + + bool get_element(size_t i, int64_t* out) const override { + T value; + size_t i0, i1; + this->gby_.get_group(i, &i0, &i1); + int64_t count = 0; + if (IS_GROUPED) { + bool is_valid = this->col_.get_element(i, &value); + if (COUNTNA) { + count = is_valid? 0 + : static_cast(i1 - i0); + } else { + count = is_valid? static_cast(i1 - i0) + : 0; + } + } else { + for (size_t gi = i0; gi < i1; ++gi) { + bool is_valid = this->col_.get_element(gi, &value); + count += COUNTNA != is_valid; + } + } + *out = count; + return true; // *out is always valid + } +}; + + + +class CountNullary_ColumnImpl : public Virtual_ColumnImpl { protected: Groupby gby_; public: - CountRows_ColumnImpl(const Groupby& gby) + CountNullary_ColumnImpl(const Groupby& gby) : Virtual_ColumnImpl(gby.size(), SType::INT64), gby_(gby) {} ColumnImpl *clone() const override { - return new CountRows_ColumnImpl(Groupby(gby_)); + return new CountNullary_ColumnImpl(Groupby(gby_)); } size_t n_children() const noexcept override { @@ -55,7 +88,5 @@ class CountRows_ColumnImpl : public Virtual_ColumnImpl { }; - - } // namespace dt #endif diff --git a/src/core/column/countna.h b/src/core/column/countna.h deleted file mode 100644 index 7b488162c..000000000 --- a/src/core/column/countna.h +++ /dev/null @@ -1,60 +0,0 @@ -//------------------------------------------------------------------------------ -// Copyright 2023 H2O.ai -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. -//------------------------------------------------------------------------------ -#ifndef dt_COLUMN_COUNTNA_h -#define dt_COLUMN_COUNTNA_h -#include "column/reduce_unary.h" -namespace dt { - - -template -class Count_ColumnImpl : public ReduceUnary_ColumnImpl { - public: - using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; - - bool get_element(size_t i, int64_t* out) const override { - T value; - size_t i0, i1; - this->gby_.get_group(i, &i0, &i1); - int64_t count = 0; - if (IS_GROUPED){ - bool isvalid = this->col_.get_element(i, &value); - if (COUNTNA){ - count = isvalid? 0 : static_cast(i1 - i0); - } else { - count = isvalid? static_cast(i1 - i0) : 0; - } - *out = count; - return true; // *out is not NA - } else { - for (size_t gi = i0; gi < i1; ++gi) { - bool isvalid = this->col_.get_element(gi, &value); - count += COUNTNA? !isvalid : isvalid; - } - *out = count; - return true; // *out is not NA - } - } -}; - -} // namespace dt -#endif - diff --git a/src/core/column/reduce_unary.h b/src/core/column/reduce_unary.h index ec58ba745..9f8729ce0 100644 --- a/src/core/column/reduce_unary.h +++ b/src/core/column/reduce_unary.h @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2022 H2O.ai +// Copyright 2022-2023 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -32,10 +32,9 @@ class ReduceUnary_ColumnImpl : public Virtual_ColumnImpl { Column col_; Groupby gby_; - public: - ReduceUnary_ColumnImpl(Column &&col, SType stype, const Groupby& gby) - : Virtual_ColumnImpl(gby.size(), stype), + ReduceUnary_ColumnImpl(Column &&col, const Groupby& gby, SType stype_out) + : Virtual_ColumnImpl(gby.size(), stype_out), col_(std::move(col)), gby_(gby) { @@ -43,8 +42,13 @@ class ReduceUnary_ColumnImpl : public Virtual_ColumnImpl { } + ReduceUnary_ColumnImpl(Column &&col, const Groupby& gby) + : ReduceUnary_ColumnImpl(std::move(col), gby, col.stype()) + {} + + ColumnImpl *clone() const override { - return new ReduceUnary_ColumnImpl(Column(col_), this->stype(), Groupby(gby_)); + return new ReduceUnary_ColumnImpl(Column(col_), Groupby(gby_), this->stype()); } diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count.cc similarity index 72% rename from src/core/expr/fexpr_count_countna.cc rename to src/core/expr/fexpr_count.cc index 6a863aa80..5ee19b94c 100644 --- a/src/core/expr/fexpr_count_countna.cc +++ b/src/core/expr/fexpr_count.cc @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2022-2023 H2O.ai +// Copyright 2023 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -21,8 +21,7 @@ //------------------------------------------------------------------------------ #include "column/const.h" #include "column/latent.h" -#include "column/countna.h" -#include "column/countna_no_args.h" +#include "column/count.h" #include "documentation.h" #include "expr/fexpr_reduce_unary.h" #include "expr/eval_context.h" @@ -32,8 +31,9 @@ namespace dt { namespace expr { + template -class FExpr_Count : public FExpr_ReduceUnary { +class FExpr_CountUnary : public FExpr_ReduceUnary { public: using FExpr_ReduceUnary::FExpr_ReduceUnary; @@ -43,7 +43,8 @@ class FExpr_Count : public FExpr_ReduceUnary { : "count"; } - Column evaluate1(Column&& col, const Groupby& gby, bool is_grouped) const { + + Column evaluate1(Column&& col, const Groupby& gby, bool is_grouped) const override { SType stype = col.stype(); switch (stype) { case SType::VOID: @@ -71,71 +72,68 @@ class FExpr_Count : public FExpr_ReduceUnary { } } + template Column make(Column&& col, const Groupby& gby, bool is_grouped) const { if (is_grouped) { - return Column(new Latent_ColumnImpl(new Count_ColumnImpl( - std::move(col), SType::INT64, gby + return Column(new Latent_ColumnImpl(new CountUnary_ColumnImpl( + std::move(col), gby, SType::INT64 ))); } else { - return Column(new Latent_ColumnImpl(new Count_ColumnImpl( - std::move(col), SType::INT64, gby + return Column(new Latent_ColumnImpl(new CountUnary_ColumnImpl( + std::move(col), gby, SType::INT64 ))); } } + }; -// gets the count of all rows - nulls are not checked + template -class FExpr_Count_Rows : public FExpr_Func { +class FExpr_CountNullary : public FExpr_Func { public: - FExpr_Count_Rows(){} - std::string repr() const override { - std::string out = COUNTNA? "countna(None)" + std::string out = COUNTNA? "countna()" : "count()"; return out; } - Workframe evaluate_n(EvalContext &ctx) const override { - Workframe wf(ctx); - Groupby gby = ctx.get_groupby(); - Column col; - if (COUNTNA) { - col = Const_ColumnImpl::make_int_column(gby.size(), 0, SType::INT64); - wf.add_column(std::move(col), "countna", Grouping::GtoONE); - return wf; - } - - if (ctx.has_groupby()) { - col = Column(new Latent_ColumnImpl(new CountRows_ColumnImpl(gby))); - } else { + Workframe evaluate_n(EvalContext &ctx) const override { + Workframe wf(ctx); + Groupby gby = ctx.get_groupby(); + Column col; + + if (COUNTNA) { + col = Const_ColumnImpl::make_int_column(gby.size(), 0, SType::INT64); + wf.add_column(std::move(col), "countna", Grouping::GtoONE); + return wf; + } + + if (ctx.has_groupby()) { + col = Column(new Latent_ColumnImpl(new CountNullary_ColumnImpl(gby))); + } else { auto value = static_cast(ctx.nrows()); col = Const_ColumnImpl::make_int_column(1, value, SType::INT64); } - wf.add_column(std::move(col), "count", Grouping::GtoONE); - return wf; - } + wf.add_column(std::move(col), "count", Grouping::GtoONE); + return wf; + } }; static py::oobj pyfn_count(const py::XArgs &args) { auto arg = args[0].to_oobj_or_none(); - if (arg.is_none()) { - return PyFExpr::make(new FExpr_Count_Rows()); - } - return PyFExpr::make(new FExpr_Count(as_fexpr(arg))); + return arg.is_none()? PyFExpr::make(new FExpr_CountNullary()) + : PyFExpr::make(new FExpr_CountUnary(as_fexpr(arg))); } static py::oobj pyfn_countna(const py::XArgs &args) { auto arg = args[0].to_oobj_or_none(); - if (arg.is_none()) { - return PyFExpr::make(new FExpr_Count_Rows()); - } - return PyFExpr::make(new FExpr_Count(as_fexpr(arg))); + return arg.is_none()? PyFExpr::make(new FExpr_CountNullary()) + : PyFExpr::make(new FExpr_CountUnary(as_fexpr(arg))); } diff --git a/src/core/expr/fexpr_mean.cc b/src/core/expr/fexpr_mean.cc index 99c70cb4e..68d5ce885 100644 --- a/src/core/expr/fexpr_mean.cc +++ b/src/core/expr/fexpr_mean.cc @@ -71,6 +71,7 @@ class FExpr_Mean : public FExpr_ReduceUnary { if (stype == SType::DATE32 || stype == SType::TIME64) { col_out.cast_inplace(stype); } + return col_out; } @@ -81,7 +82,7 @@ class FExpr_Mean : public FExpr_ReduceUnary { return is_grouped? std::move(col) : Column(new Latent_ColumnImpl(new Mean_ColumnImpl( - std::move(col), stype, gby + std::move(col), gby ))); } }; diff --git a/src/core/expr/fexpr_minmax.cc b/src/core/expr/fexpr_minmax.cc index 5feda06b5..3a5faf5f6 100644 --- a/src/core/expr/fexpr_minmax.cc +++ b/src/core/expr/fexpr_minmax.cc @@ -56,11 +56,9 @@ class FExpr_MinMax : public FExpr_ReduceUnary { case SType::INT16: return make(std::move(col), gby, is_grouped); case SType::INT32: - return make(std::move(col), gby, is_grouped); case SType::DATE32: return make(std::move(col), gby, is_grouped); case SType::INT64: - return make(std::move(col), gby, is_grouped); case SType::TIME64: return make(std::move(col), gby, is_grouped); case SType::FLOAT32: @@ -78,7 +76,7 @@ class FExpr_MinMax : public FExpr_ReduceUnary { Column make(Column&& col, const Groupby& gby, bool is_grouped) const { return is_grouped? std::move(col) : Column(new Latent_ColumnImpl(new MinMax_ColumnImpl( - std::move(col), col.stype(), gby + std::move(col), gby ))); } diff --git a/src/core/expr/fexpr_sumprod.cc b/src/core/expr/fexpr_sumprod.cc index 0a5205666..ab1adc0bc 100644 --- a/src/core/expr/fexpr_sumprod.cc +++ b/src/core/expr/fexpr_sumprod.cc @@ -72,11 +72,11 @@ class FExpr_SumProd : public FExpr_ReduceUnary { col.cast_inplace(stype); if (is_grouped) { return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( - std::move(col), stype, gby + std::move(col), gby ))); } else { return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( - std::move(col), stype, gby + std::move(col), gby ))); } } diff --git a/src/core/expr/head_func.cc b/src/core/expr/head_func.cc index 9cb82e629..98d74ea12 100644 --- a/src/core/expr/head_func.cc +++ b/src/core/expr/head_func.cc @@ -120,13 +120,6 @@ static ptrHead make_binop(Op op, const py::otuple& params) { } -static ptrHead make_reduce0(Op op, const py::otuple& params) { - xassert(params.size() == 0); - (void) params; - return ptrHead(new Head_Reduce_Nullary(op)); -} - - static ptrHead make_reduce1(Op op, const py::otuple& params) { xassert(params.size() == 0); (void) params; @@ -153,7 +146,6 @@ void Head_Func::init() { factory[static_cast(Op::SETPLUS)] = make_colsetop; factory[static_cast(Op::SETMINUS)] = make_colsetop; factory[static_cast(Op::SHIFTFN)] = &Head_Func_Shift::make; - factory[static_cast(Op::COUNT0)] = make_reduce0; factory[static_cast(Op::COV)] = make_reduce2; factory[static_cast(Op::CORR)] = make_reduce2; factory[static_cast(Op::ARCTAN2)] = make_binop; diff --git a/src/core/expr/head_reduce_nullary.cc b/src/core/expr/head_reduce_nullary.cc deleted file mode 100644 index 2041fd048..000000000 --- a/src/core/expr/head_reduce_nullary.cc +++ /dev/null @@ -1,51 +0,0 @@ -//------------------------------------------------------------------------------ -// Copyright 2019-2020 H2O.ai -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. -//------------------------------------------------------------------------------ -#include "column/const.h" -#include "expr/eval_context.h" -#include "expr/expr.h" -#include "expr/head_reduce.h" -#include "expr/workframe.h" -#include "utils/assert.h" -#include "utils/exceptions.h" -namespace dt { -namespace expr { - - - - -//------------------------------------------------------------------------------ -// Head_Reduce_Nullary -//------------------------------------------------------------------------------ - -Workframe Head_Reduce_Nullary::evaluate_n( - const vecExpr& args, EvalContext& ctx) const -{ - xassert(args.size() == 0); - (void) args; - throw RuntimeError() << "Unknown op " << static_cast(op) - << " in Head_Reduce_Nullary"; -} - - - - -}} // namespace dt::expr diff --git a/src/datatable/__init__.py b/src/datatable/__init__.py index e3ad1437e..8010f4674 100644 --- a/src/datatable/__init__.py +++ b/src/datatable/__init__.py @@ -21,8 +21,8 @@ # IN THE SOFTWARE. #------------------------------------------------------------------------------- from .frame import Frame -from .expr import (min, max, sd, isna, sum, count, first, abs, exp, - last, log, log10, f, g, median, cov, corr, countna, nunique) +from .expr import (min, max, sd, isna, sum, count, countna, first, abs, exp, + last, log, log10, f, g, median, cov, corr, nunique) from .lib._datatable import ( as_type, by, diff --git a/src/datatable/expr/__init__.py b/src/datatable/expr/__init__.py index dd0f5cae7..da91b8a9c 100644 --- a/src/datatable/expr/__init__.py +++ b/src/datatable/expr/__init__.py @@ -23,13 +23,14 @@ from .expr import f, g, Expr from .math import abs, log, log10, exp, isna from .reduce import ( - sum, count, first, last, median, min, max, sd, cov, corr, countna, nunique) + sum, count, countna, first, last, median, min, max, sd, cov, corr, nunique) __all__ = ( "Expr", "abs", "corr", "count", + "countna", "cov", "exp", "f", @@ -45,5 +46,4 @@ "sd", "sum", "nunique", - "countna", ) diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index d696abac1..c35e80bad 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -28,8 +28,9 @@ __all__ = ( "corr", - "count", "cov", + "count", + "countna", "first", "last", "max", @@ -38,31 +39,30 @@ "nunique", "sd", "sum", - "countna", ) def count(iterable=None): - if iterable is None: + if isinstance(iterable, (core.FExpr)): return core.count(iterable) - if (not isinstance(iterable, dict) - and (isinstance(iterable, core.FExpr) - or (iterable and hasattr(iterable, "__getitem__") and isinstance(iterable[0], core.FExpr)))): - return core.count(iterable) - if isinstance(iterable, dict) and isinstance([*iterable.values()][0], core.FExpr): - return core.count(iterable) - return _builtin_sum((x is not None) for x in iterable) + elif iterable is None: + return core.count() + else: + return _builtin_sum((x is not None) for x in iterable) -def nunique(iterable=None): - return Expr(OpCodes.NUNIQUE, (iterable,)) +def countna(iterable=None): + if isinstance(iterable, (core.FExpr)): + return core.countna(iterable) + elif iterable is None: + return core.countna() + else: + return _builtin_sum((x is None) for x in iterable) -def countna(iterable=None): - if isinstance(iterable, core.Frame): - return iterable.countna() - return core.countna(iterable) +def nunique(iterable=None): + return Expr(OpCodes.NUNIQUE, (iterable,)) def first(iterable): @@ -119,13 +119,14 @@ def sum(iterable, start=0): elif isinstance(iterable, core.Frame): return iterable.sum() else: - return _builtin_sum(iterable, start) + return _builtin_sum(iterable, start) + # noinspection PyShadowingBuiltins def min(*args, **kwds): if (len(args) == 1 and (not isinstance(args[0], dict)) and (isinstance(args[0], (Expr, core.FExpr)) - or (args[0] and hasattr(args[0], "__getitem__") and isinstance(args[0][0], (Expr, core.FExpr))))): + or (hasattr(args[0], "__getitem__") and isinstance(args[0][0], (Expr, core.FExpr))))): return core.min(args) elif len(args) == 1 and isinstance(args[0], dict) and isinstance([*args[0].values()][0], (Expr, core.FExpr)): return core.min(args) @@ -139,7 +140,7 @@ def min(*args, **kwds): def max(*args, **kwds): if (len(args) == 1 and (not isinstance(args[0], dict)) and (isinstance(args[0], (Expr, core.FExpr)) - or (args[0] and hasattr(args[0], "__getitem__") and isinstance(args[0][0], (Expr, core.FExpr))))): + or (hasattr(args[0], "__getitem__") and isinstance(args[0][0], (Expr, core.FExpr))))): return core.max(args) elif len(args) == 1 and isinstance(args[0], dict) and isinstance([*args[0].values()][0], (Expr, core.FExpr)): return core.max(args) From 39ad7d4e943943ac47d8e10a9de6404b87cf5758 Mon Sep 17 00:00:00 2001 From: Oleksiy Kononenko Date: Fri, 21 Apr 2023 14:06:15 -0700 Subject: [PATCH 118/124] Remove unused files --- src/core/column/countna.h | 60 ----------- src/core/expr/fexpr_count_countna.cc | 156 --------------------------- 2 files changed, 216 deletions(-) delete mode 100644 src/core/column/countna.h delete mode 100644 src/core/expr/fexpr_count_countna.cc diff --git a/src/core/column/countna.h b/src/core/column/countna.h deleted file mode 100644 index 7b488162c..000000000 --- a/src/core/column/countna.h +++ /dev/null @@ -1,60 +0,0 @@ -//------------------------------------------------------------------------------ -// Copyright 2023 H2O.ai -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. -//------------------------------------------------------------------------------ -#ifndef dt_COLUMN_COUNTNA_h -#define dt_COLUMN_COUNTNA_h -#include "column/reduce_unary.h" -namespace dt { - - -template -class Count_ColumnImpl : public ReduceUnary_ColumnImpl { - public: - using ReduceUnary_ColumnImpl::ReduceUnary_ColumnImpl; - - bool get_element(size_t i, int64_t* out) const override { - T value; - size_t i0, i1; - this->gby_.get_group(i, &i0, &i1); - int64_t count = 0; - if (IS_GROUPED){ - bool isvalid = this->col_.get_element(i, &value); - if (COUNTNA){ - count = isvalid? 0 : static_cast(i1 - i0); - } else { - count = isvalid? static_cast(i1 - i0) : 0; - } - *out = count; - return true; // *out is not NA - } else { - for (size_t gi = i0; gi < i1; ++gi) { - bool isvalid = this->col_.get_element(gi, &value); - count += COUNTNA? !isvalid : isvalid; - } - *out = count; - return true; // *out is not NA - } - } -}; - -} // namespace dt -#endif - diff --git a/src/core/expr/fexpr_count_countna.cc b/src/core/expr/fexpr_count_countna.cc deleted file mode 100644 index 6a863aa80..000000000 --- a/src/core/expr/fexpr_count_countna.cc +++ /dev/null @@ -1,156 +0,0 @@ -//------------------------------------------------------------------------------ -// Copyright 2022-2023 H2O.ai -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. -//------------------------------------------------------------------------------ -#include "column/const.h" -#include "column/latent.h" -#include "column/countna.h" -#include "column/countna_no_args.h" -#include "documentation.h" -#include "expr/fexpr_reduce_unary.h" -#include "expr/eval_context.h" -#include "expr/workframe.h" -#include "python/xargs.h" -#include "stype.h" -namespace dt { -namespace expr { - -template -class FExpr_Count : public FExpr_ReduceUnary { - public: - using FExpr_ReduceUnary::FExpr_ReduceUnary; - - - std::string name() const override { - return COUNTNA? "countna" - : "count"; - } - - Column evaluate1(Column&& col, const Groupby& gby, bool is_grouped) const { - SType stype = col.stype(); - switch (stype) { - case SType::VOID: - case SType::BOOL: - case SType::INT8: - return make(std::move(col), gby, is_grouped); - case SType::INT16: - return make(std::move(col), gby, is_grouped); - case SType::DATE32: - case SType::INT32: - return make(std::move(col), gby, is_grouped); - case SType::TIME64: - case SType::INT64: - return make(std::move(col), gby, is_grouped); - case SType::FLOAT32: - return make(std::move(col), gby, is_grouped); - case SType::FLOAT64: - return make(std::move(col), gby, is_grouped); - case SType::STR32: - case SType::STR64: - return make(std::move(col), gby, is_grouped); - default: - throw TypeError() - << "Invalid column of type `" << stype << "` in " << repr(); - } - } - - template - Column make(Column&& col, const Groupby& gby, bool is_grouped) const { - if (is_grouped) { - return Column(new Latent_ColumnImpl(new Count_ColumnImpl( - std::move(col), SType::INT64, gby - ))); - } else { - return Column(new Latent_ColumnImpl(new Count_ColumnImpl( - std::move(col), SType::INT64, gby - ))); - } - } -}; - - -// gets the count of all rows - nulls are not checked -template -class FExpr_Count_Rows : public FExpr_Func { - public: - FExpr_Count_Rows(){} - - std::string repr() const override { - std::string out = COUNTNA? "countna(None)" - : "count()"; - return out; - } - - Workframe evaluate_n(EvalContext &ctx) const override { - Workframe wf(ctx); - Groupby gby = ctx.get_groupby(); - Column col; - - if (COUNTNA) { - col = Const_ColumnImpl::make_int_column(gby.size(), 0, SType::INT64); - wf.add_column(std::move(col), "countna", Grouping::GtoONE); - return wf; - } - - if (ctx.has_groupby()) { - col = Column(new Latent_ColumnImpl(new CountRows_ColumnImpl(gby))); - } else { - auto value = static_cast(ctx.nrows()); - col = Const_ColumnImpl::make_int_column(1, value, SType::INT64); - } - wf.add_column(std::move(col), "count", Grouping::GtoONE); - return wf; - } - -}; - - -static py::oobj pyfn_count(const py::XArgs &args) { - auto arg = args[0].to_oobj_or_none(); - if (arg.is_none()) { - return PyFExpr::make(new FExpr_Count_Rows()); - } - return PyFExpr::make(new FExpr_Count(as_fexpr(arg))); -} - -static py::oobj pyfn_countna(const py::XArgs &args) { - auto arg = args[0].to_oobj_or_none(); - if (arg.is_none()) { - return PyFExpr::make(new FExpr_Count_Rows()); - } - return PyFExpr::make(new FExpr_Count(as_fexpr(arg))); -} - - -DECLARE_PYFN(&pyfn_count) - ->name("count") - ->docs(doc_dt_count) - ->arg_names({"cols"}) - ->n_positional_args(1); - - -DECLARE_PYFN(&pyfn_countna) - ->name("countna") - ->docs(doc_dt_countna) - ->arg_names({"cols"}) - ->n_positional_args(1); - - -}} // dt::expr From 7b63805569adf70856a5ffed2b5ba88ea1e1a4fd Mon Sep 17 00:00:00 2001 From: Oleksiy Kononenko Date: Fri, 21 Apr 2023 14:16:28 -0700 Subject: [PATCH 119/124] Restore changes --- src/core/column/reduce_unary.h | 11 ------ src/core/expr/fexpr_mean.cc | 2 +- src/core/expr/fexpr_minmax.cc | 2 +- src/core/expr/fexpr_sumprod.cc | 4 +-- src/core/expr/head_reduce_nullary.cc | 51 ---------------------------- src/datatable/expr/reduce.py | 28 ++------------- 6 files changed, 6 insertions(+), 92 deletions(-) delete mode 100644 src/core/expr/head_reduce_nullary.cc diff --git a/src/core/column/reduce_unary.h b/src/core/column/reduce_unary.h index 1d7da4936..9f8729ce0 100644 --- a/src/core/column/reduce_unary.h +++ b/src/core/column/reduce_unary.h @@ -32,16 +32,9 @@ class ReduceUnary_ColumnImpl : public Virtual_ColumnImpl { Column col_; Groupby gby_; -<<<<<<< HEAD public: ReduceUnary_ColumnImpl(Column &&col, const Groupby& gby, SType stype_out) : Virtual_ColumnImpl(gby.size(), stype_out), -======= - - public: - ReduceUnary_ColumnImpl(Column &&col, SType stype, const Groupby& gby) - : Virtual_ColumnImpl(gby.size(), stype), ->>>>>>> 8397a8ff6955daf00dca81070bebbd006d3d5b29 col_(std::move(col)), gby_(gby) { @@ -55,11 +48,7 @@ class ReduceUnary_ColumnImpl : public Virtual_ColumnImpl { ColumnImpl *clone() const override { -<<<<<<< HEAD return new ReduceUnary_ColumnImpl(Column(col_), Groupby(gby_), this->stype()); -======= - return new ReduceUnary_ColumnImpl(Column(col_), this->stype(), Groupby(gby_)); ->>>>>>> 8397a8ff6955daf00dca81070bebbd006d3d5b29 } diff --git a/src/core/expr/fexpr_mean.cc b/src/core/expr/fexpr_mean.cc index fdc0f6bd9..68d5ce885 100644 --- a/src/core/expr/fexpr_mean.cc +++ b/src/core/expr/fexpr_mean.cc @@ -82,7 +82,7 @@ class FExpr_Mean : public FExpr_ReduceUnary { return is_grouped? std::move(col) : Column(new Latent_ColumnImpl(new Mean_ColumnImpl( - std::move(col), stype, gby + std::move(col), gby ))); } }; diff --git a/src/core/expr/fexpr_minmax.cc b/src/core/expr/fexpr_minmax.cc index 5feda06b5..c1a56383d 100644 --- a/src/core/expr/fexpr_minmax.cc +++ b/src/core/expr/fexpr_minmax.cc @@ -78,7 +78,7 @@ class FExpr_MinMax : public FExpr_ReduceUnary { Column make(Column&& col, const Groupby& gby, bool is_grouped) const { return is_grouped? std::move(col) : Column(new Latent_ColumnImpl(new MinMax_ColumnImpl( - std::move(col), col.stype(), gby + std::move(col), gby ))); } diff --git a/src/core/expr/fexpr_sumprod.cc b/src/core/expr/fexpr_sumprod.cc index 0a5205666..ab1adc0bc 100644 --- a/src/core/expr/fexpr_sumprod.cc +++ b/src/core/expr/fexpr_sumprod.cc @@ -72,11 +72,11 @@ class FExpr_SumProd : public FExpr_ReduceUnary { col.cast_inplace(stype); if (is_grouped) { return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( - std::move(col), stype, gby + std::move(col), gby ))); } else { return Column(new Latent_ColumnImpl(new SumProd_ColumnImpl( - std::move(col), stype, gby + std::move(col), gby ))); } } diff --git a/src/core/expr/head_reduce_nullary.cc b/src/core/expr/head_reduce_nullary.cc deleted file mode 100644 index 2041fd048..000000000 --- a/src/core/expr/head_reduce_nullary.cc +++ /dev/null @@ -1,51 +0,0 @@ -//------------------------------------------------------------------------------ -// Copyright 2019-2020 H2O.ai -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. -//------------------------------------------------------------------------------ -#include "column/const.h" -#include "expr/eval_context.h" -#include "expr/expr.h" -#include "expr/head_reduce.h" -#include "expr/workframe.h" -#include "utils/assert.h" -#include "utils/exceptions.h" -namespace dt { -namespace expr { - - - - -//------------------------------------------------------------------------------ -// Head_Reduce_Nullary -//------------------------------------------------------------------------------ - -Workframe Head_Reduce_Nullary::evaluate_n( - const vecExpr& args, EvalContext& ctx) const -{ - xassert(args.size() == 0); - (void) args; - throw RuntimeError() << "Unknown op " << static_cast(op) - << " in Head_Reduce_Nullary"; -} - - - - -}} // namespace dt::expr diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index a4ba2bf1d..69e14d87f 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -44,24 +44,12 @@ def count(iterable=None): -<<<<<<< HEAD if isinstance(iterable, (core.FExpr)): return core.count(iterable) elif iterable is None: return core.count() else: return _builtin_sum((x is not None) for x in iterable) -======= - if iterable is None: - return core.count(iterable) - if (not isinstance(iterable, dict) - and (isinstance(iterable, core.FExpr) - or (iterable and hasattr(iterable, "__getitem__") and isinstance(iterable[0], core.FExpr)))): - return core.count(iterable) - if isinstance(iterable, dict) and isinstance([*iterable.values()][0], core.FExpr): - return core.count(iterable) - return _builtin_sum((x is not None) for x in iterable) ->>>>>>> 8397a8ff6955daf00dca81070bebbd006d3d5b29 def countna(iterable=None): @@ -73,15 +61,8 @@ def countna(iterable=None): return _builtin_sum((x is None) for x in iterable) -<<<<<<< HEAD def nunique(iterable=None): return Expr(OpCodes.NUNIQUE, (iterable,)) -======= -def countna(iterable=None): - if isinstance(iterable, core.Frame): - return iterable.countna() - return core.countna(iterable) ->>>>>>> 8397a8ff6955daf00dca81070bebbd006d3d5b29 def first(iterable): @@ -107,7 +88,6 @@ def last(iterable): return x - def sd(expr): return Expr(OpCodes.STDEV, (expr,)) @@ -138,18 +118,14 @@ def sum(iterable, start=0): elif isinstance(iterable, core.Frame): return iterable.sum() else: -<<<<<<< HEAD return _builtin_sum(iterable, start) -======= - return _builtin_sum(iterable, start) ->>>>>>> 8397a8ff6955daf00dca81070bebbd006d3d5b29 # noinspection PyShadowingBuiltins def min(*args, **kwds): if (len(args) == 1 and (not isinstance(args[0], dict)) and (isinstance(args[0], (Expr, core.FExpr)) - or (args[0] and hasattr(args[0], "__getitem__") and isinstance(args[0][0], (Expr, core.FExpr))))): + or (hasattr(args[0], "__getitem__") and isinstance(args[0][0], (Expr, core.FExpr))))): return core.min(args) elif len(args) == 1 and isinstance(args[0], dict) and isinstance([*args[0].values()][0], (Expr, core.FExpr)): return core.min(args) @@ -163,7 +139,7 @@ def min(*args, **kwds): def max(*args, **kwds): if (len(args) == 1 and (not isinstance(args[0], dict)) and (isinstance(args[0], (Expr, core.FExpr)) - or (args[0] and hasattr(args[0], "__getitem__") and isinstance(args[0][0], (Expr, core.FExpr))))): + or (hasattr(args[0], "__getitem__") and isinstance(args[0][0], (Expr, core.FExpr))))): return core.max(args) elif len(args) == 1 and isinstance(args[0], dict) and isinstance([*args[0].values()][0], (Expr, core.FExpr)): return core.max(args) From c1e75b658bcd57b552c125e0bdc6a6b9b5b80ee0 Mon Sep 17 00:00:00 2001 From: Oleksiy Kononenko Date: Fri, 21 Apr 2023 14:21:41 -0700 Subject: [PATCH 120/124] Remove one more unused file --- src/core/column/countna_no_args.h | 61 ------------------------------- 1 file changed, 61 deletions(-) delete mode 100644 src/core/column/countna_no_args.h diff --git a/src/core/column/countna_no_args.h b/src/core/column/countna_no_args.h deleted file mode 100644 index 5766de944..000000000 --- a/src/core/column/countna_no_args.h +++ /dev/null @@ -1,61 +0,0 @@ -//------------------------------------------------------------------------------ -// Copyright 2019-2021 H2O.ai -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. -//------------------------------------------------------------------------------ -#ifndef dt_COLUMN_COUNTNA_ALLROWS_h -#define dt_COLUMN_COUNTNA_ALLROWS_h -#include "column/virtual.h" -#include "stype.h" -namespace dt { - - -class CountRows_ColumnImpl : public Virtual_ColumnImpl { - protected: - Groupby gby_; - - public: - CountRows_ColumnImpl(const Groupby& gby) - : Virtual_ColumnImpl(gby.size(), SType::INT64), - gby_(gby) - {} - - - ColumnImpl *clone() const override { - return new CountRows_ColumnImpl(Groupby(gby_)); - } - - size_t n_children() const noexcept override { - return 0; - } - - - bool get_element(size_t i, int64_t* out) const override { - size_t i0, i1; - this->gby_.get_group(i, &i0, &i1); - *out = static_cast(i1 - i0); - return true; - } -}; - - - - -} // namespace dt -#endif From eb77f2fcaeed4ec80314958805799ce0c1577a01 Mon Sep 17 00:00:00 2001 From: Oleksiy Kononenko Date: Fri, 21 Apr 2023 14:26:30 -0700 Subject: [PATCH 121/124] More fixes --- src/core/column/reduce_unary.h | 2 +- src/core/expr/fexpr_minmax.cc | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/core/column/reduce_unary.h b/src/core/column/reduce_unary.h index 9f8729ce0..ef66120a9 100644 --- a/src/core/column/reduce_unary.h +++ b/src/core/column/reduce_unary.h @@ -31,7 +31,7 @@ class ReduceUnary_ColumnImpl : public Virtual_ColumnImpl { protected: Column col_; Groupby gby_; - + public: ReduceUnary_ColumnImpl(Column &&col, const Groupby& gby, SType stype_out) : Virtual_ColumnImpl(gby.size(), stype_out), diff --git a/src/core/expr/fexpr_minmax.cc b/src/core/expr/fexpr_minmax.cc index c1a56383d..3a5faf5f6 100644 --- a/src/core/expr/fexpr_minmax.cc +++ b/src/core/expr/fexpr_minmax.cc @@ -56,11 +56,9 @@ class FExpr_MinMax : public FExpr_ReduceUnary { case SType::INT16: return make(std::move(col), gby, is_grouped); case SType::INT32: - return make(std::move(col), gby, is_grouped); case SType::DATE32: return make(std::move(col), gby, is_grouped); case SType::INT64: - return make(std::move(col), gby, is_grouped); case SType::TIME64: return make(std::move(col), gby, is_grouped); case SType::FLOAT32: From 2ce4880e4294f49a12b3688e764f2d99ff83f5b4 Mon Sep 17 00:00:00 2001 From: Oleksiy Kononenko Date: Fri, 21 Apr 2023 14:35:02 -0700 Subject: [PATCH 122/124] Remove unnecessary newline --- src/core/column/mean.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/core/column/mean.h b/src/core/column/mean.h index b5010696c..7ce4d4029 100644 --- a/src/core/column/mean.h +++ b/src/core/column/mean.h @@ -54,4 +54,3 @@ class Mean_ColumnImpl : public ReduceUnary_ColumnImpl { } // namespace dt #endif - From 74bd5b20e49605b101048afbf6b78f116d40ec7a Mon Sep 17 00:00:00 2001 From: Oleksiy Kononenko Date: Fri, 21 Apr 2023 17:27:27 -0700 Subject: [PATCH 123/124] Add some comment --- src/core/column/reduce_unary.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/core/column/reduce_unary.h b/src/core/column/reduce_unary.h index ef66120a9..d3d197360 100644 --- a/src/core/column/reduce_unary.h +++ b/src/core/column/reduce_unary.h @@ -33,6 +33,7 @@ class ReduceUnary_ColumnImpl : public Virtual_ColumnImpl { Groupby gby_; public: + ReduceUnary_ColumnImpl(Column &&col, const Groupby& gby, SType stype_out) : Virtual_ColumnImpl(gby.size(), stype_out), col_(std::move(col)), @@ -42,6 +43,8 @@ class ReduceUnary_ColumnImpl : public Virtual_ColumnImpl { } + // Constructor for the case when `stype_out` is the same + // as the input column stype. ReduceUnary_ColumnImpl(Column &&col, const Groupby& gby) : ReduceUnary_ColumnImpl(std::move(col), gby, col.stype()) {} From 6818c14f0ddcd262a3877e6ca41485f5a23095da Mon Sep 17 00:00:00 2001 From: Oleksiy Kononenko Date: Fri, 21 Apr 2023 18:18:14 -0700 Subject: [PATCH 124/124] Fixes to docs --- docs/api/dt/count.rst | 4 ++++ docs/api/dt/countna.rst | 4 ++-- src/core/expr/fexpr_count.cc | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/api/dt/count.rst b/docs/api/dt/count.rst index 6b4e20e11..9d87cd5ea 100644 --- a/docs/api/dt/count.rst +++ b/docs/api/dt/count.rst @@ -22,6 +22,10 @@ number of columns as in `cols`. All the resulting column's stypes are `int64`. + except: TypeError + The exception is raised when one of the input columns has + an `obj64` type. + Examples -------- diff --git a/docs/api/dt/countna.rst b/docs/api/dt/countna.rst index 8dccdcf4d..41523bcc1 100644 --- a/docs/api/dt/countna.rst +++ b/docs/api/dt/countna.rst @@ -25,8 +25,8 @@ All the resulting column's stypes are `int64`. except: TypeError - The exception is raised when one of the columns from `cols` - has an obj64 type. + The exception is raised when one of the input columns has + an `obj64` type. Examples diff --git a/src/core/expr/fexpr_count.cc b/src/core/expr/fexpr_count.cc index 5ee19b94c..cf29f559b 100644 --- a/src/core/expr/fexpr_count.cc +++ b/src/core/expr/fexpr_count.cc @@ -65,7 +65,7 @@ class FExpr_CountUnary : public FExpr_ReduceUnary { return make(std::move(col), gby, is_grouped); case SType::STR32: case SType::STR64: - return make(std::move(col), gby, is_grouped); + return make(std::move(col), gby, is_grouped); default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr();