From 726966a818e64c6ace769e1cfac6eba3baa551aa Mon Sep 17 00:00:00 2001 From: Aaron Lun Date: Fri, 28 Jun 2024 08:54:16 -0700 Subject: [PATCH] Test for correct calculations when matrix type is not double/int. (#7) This requires some bugfixes to remove the default typing assumptions. --- include/tatami_stats/grouped_sums.hpp | 6 ++-- include/tatami_stats/grouped_variances.hpp | 2 +- include/tatami_stats/ranges.hpp | 4 +-- include/tatami_stats/sums.hpp | 4 +-- include/tatami_stats/variances.hpp | 9 +++--- tests/src/grouped_medians.cpp | 36 ++++++++++++++++++++++ tests/src/grouped_sums.cpp | 36 ++++++++++++++++++++++ tests/src/grouped_variances.cpp | 36 ++++++++++++++++++++++ tests/src/medians.cpp | 27 ++++++++++++++++ tests/src/ranges.cpp | 27 ++++++++++++++++ tests/src/sums.cpp | 27 ++++++++++++++++ tests/src/variances.cpp | 27 ++++++++++++++++ 12 files changed, 229 insertions(+), 12 deletions(-) diff --git a/include/tatami_stats/grouped_sums.hpp b/include/tatami_stats/grouped_sums.hpp index fd495fd..79a470e 100644 --- a/include/tatami_stats/grouped_sums.hpp +++ b/include/tatami_stats/grouped_sums.hpp @@ -117,7 +117,7 @@ void apply(bool row, const tatami::Matrix* p, const Group_* grou runners.emplace_back(local_output.back().data(), sopt.skip_nan, start); } - auto ext = tatami::consecutive_extractor(p, !row, 0, otherdim, start, len, opt); + auto ext = tatami::consecutive_extractor(p, !row, static_cast(0), otherdim, start, len, opt); std::vector xbuffer(len); std::vector ibuffer(len); @@ -178,8 +178,8 @@ void apply(bool row, const tatami::Matrix* p, const Group_* grou runners.emplace_back(len, local_output.back().data(), sopt.skip_nan); } - std::vector xbuffer(len); - auto ext = tatami::consecutive_extractor(p, !row, 0, otherdim, start, len); + std::vector xbuffer(len); + auto ext = tatami::consecutive_extractor(p, !row, static_cast(0), otherdim, start, len); for (int i = 0; i < otherdim; ++i) { auto ptr = ext->fetch(xbuffer.data()); diff --git a/include/tatami_stats/grouped_variances.hpp b/include/tatami_stats/grouped_variances.hpp index 455b170..df23091 100644 --- a/include/tatami_stats/grouped_variances.hpp +++ b/include/tatami_stats/grouped_variances.hpp @@ -396,7 +396,7 @@ void apply(bool row, const tatami::Matrix* p, const Group_* grou runners.emplace_back(len, local_mean_output.back().data(), local_var_output.back().data(), sopt.skip_nan); } - std::vector xbuffer(len); + std::vector xbuffer(len); auto ext = tatami::consecutive_extractor(p, !row, static_cast(0), otherdim, start, len); for (Index_ i = 0; i < otherdim; ++i) { diff --git a/include/tatami_stats/ranges.hpp b/include/tatami_stats/ranges.hpp index c207130..21760ea 100644 --- a/include/tatami_stats/ranges.hpp +++ b/include/tatami_stats/ranges.hpp @@ -368,7 +368,7 @@ void apply(bool row, const tatami::Matrix* p, Output_* min_out, } else { tatami::parallelize([&](size_t thread, Index_ s, Index_ l) { - auto ext = tatami::consecutive_extractor(p, !row, 0, otherdim, s, l, opt); + auto ext = tatami::consecutive_extractor(p, !row, static_cast(0), otherdim, s, l, opt); std::vector vbuffer(l); std::vector ibuffer(l); @@ -416,7 +416,7 @@ void apply(bool row, const tatami::Matrix* p, Output_* min_out, } else { tatami::parallelize([&](size_t thread, Index_ s, Index_ l) { - auto ext = tatami::consecutive_extractor(p, !row, 0, otherdim, s, l); + auto ext = tatami::consecutive_extractor(p, !row, static_cast(0), otherdim, s, l); std::vector buffer(l); auto local_min = (store_min ? LocalOutputBuffer(thread, s, l, min_out) : LocalOutputBuffer()); diff --git a/include/tatami_stats/sums.hpp b/include/tatami_stats/sums.hpp index fa3947f..185c7b1 100644 --- a/include/tatami_stats/sums.hpp +++ b/include/tatami_stats/sums.hpp @@ -227,7 +227,7 @@ void apply(bool row, const tatami::Matrix* p, Output_* output, c opt.sparse_ordered_index = false; tatami::parallelize([&](size_t thread, Index_ s, Index_ l) { - auto ext = tatami::consecutive_extractor(p, !row, 0, otherdim, s, l, opt); + auto ext = tatami::consecutive_extractor(p, !row, static_cast(0), otherdim, s, l, opt); std::vector vbuffer(l); std::vector ibuffer(l); @@ -256,7 +256,7 @@ void apply(bool row, const tatami::Matrix* p, Output_* output, c } else { tatami::parallelize([&](size_t thread, Index_ s, Index_ l) { - auto ext = tatami::consecutive_extractor(p, !row, 0, otherdim, s, l); + auto ext = tatami::consecutive_extractor(p, !row, static_cast(0), otherdim, s, l); std::vector buffer(l); LocalOutputBuffer local_output(thread, s, l, output); diff --git a/include/tatami_stats/variances.hpp b/include/tatami_stats/variances.hpp index e97bb2e..6966fde 100644 --- a/include/tatami_stats/variances.hpp +++ b/include/tatami_stats/variances.hpp @@ -63,7 +63,8 @@ void add_welford_zeros(Output_& mean, Output_& sumsq, Index_ num_nonzero, Index_ template struct MockVector { MockVector(size_t) {} - Index_ operator[](size_t) const { return 0; } + Index_& operator[](size_t) { return out; } + Index_ out = 0; }; } @@ -334,7 +335,7 @@ class RunningSparse { for (Index_ i = 0; i < my_num; ++i) { auto& curM = my_mean[i]; auto& curV = my_variance[i]; - auto ct = my_count - my_nan[i]; + Index_ ct = my_count - my_nan[i]; if (ct < 2) { curV = std::numeric_limits::quiet_NaN(); @@ -412,7 +413,7 @@ void apply(bool row, const tatami::Matrix* p, Output_* output, c } else { tatami::parallelize([&](size_t thread, Index_ s, Index_ l) { - auto ext = tatami::consecutive_extractor(p, !row, 0, otherdim, s, l); + auto ext = tatami::consecutive_extractor(p, !row, static_cast(0), otherdim, s, l); std::vector vbuffer(l); std::vector ibuffer(l); @@ -443,7 +444,7 @@ void apply(bool row, const tatami::Matrix* p, Output_* output, c } else { tatami::parallelize([&](size_t thread, Index_ s, Index_ l) { - auto ext = tatami::consecutive_extractor(p, !row, 0, otherdim, s, l); + auto ext = tatami::consecutive_extractor(p, !row, static_cast(0), otherdim, s, l); std::vector buffer(l); std::vector running_means(l); diff --git a/tests/src/grouped_medians.cpp b/tests/src/grouped_medians.cpp index b8aae2f..94da39e 100644 --- a/tests/src/grouped_medians.cpp +++ b/tests/src/grouped_medians.cpp @@ -195,6 +195,42 @@ TEST(GroupedMedians, EdgeCases) { EXPECT_TRUE(tatami_stats::grouped_medians::by_column(&empty1, grouping.data()).empty()); } +TEST(GroupedMedians, NewType) { + size_t NR = 98, NC = 152; + auto dump = tatami_test::simulate_sparse_vector(NR * NC, 0.1, /* lower = */ 1, /* upper = */ 100); + for (auto& d : dump) { + d = std::round(d); + } + auto ref = std::unique_ptr(new tatami::DenseRowMatrix(NR, NC, dump)); + + std::vector cgrouping; + for (size_t c = 0; c < NC; ++c) { + cgrouping.push_back(c % 5); + } + std::vector rgrouping; + for (size_t r = 0; r < NR; ++r) { + rgrouping.push_back(r % 7); + } + auto rexpected = tatami_stats::grouped_medians::by_row(ref.get(), cgrouping.data()); + auto cexpected = tatami_stats::grouped_medians::by_column(ref.get(), rgrouping.data()); + + std::vector ivec(dump.begin(), dump.end()); + auto dense_row = std::make_shared >(NR, NC, std::move(ivec)); + auto dense_column = tatami::convert_to_dense(dense_row.get(), false); + auto sparse_row = tatami::convert_to_compressed_sparse(dense_row.get(), true); + auto sparse_column = tatami::convert_to_compressed_sparse(dense_row.get(), false); + + EXPECT_EQ(tatami_stats::grouped_medians::by_row(dense_row.get(), cgrouping.data()), rexpected); + EXPECT_EQ(tatami_stats::grouped_medians::by_row(dense_column.get(), cgrouping.data()), rexpected); + EXPECT_EQ(tatami_stats::grouped_medians::by_row(sparse_row.get(), cgrouping.data()), rexpected); + EXPECT_EQ(tatami_stats::grouped_medians::by_row(sparse_column.get(), cgrouping.data()), rexpected); + + EXPECT_EQ(tatami_stats::grouped_medians::by_column(dense_row.get(), rgrouping.data()), cexpected); + EXPECT_EQ(tatami_stats::grouped_medians::by_column(dense_column.get(), rgrouping.data()), cexpected); + EXPECT_EQ(tatami_stats::grouped_medians::by_column(sparse_row.get(), rgrouping.data()), cexpected); + EXPECT_EQ(tatami_stats::grouped_medians::by_column(sparse_column.get(), rgrouping.data()), cexpected); +} + TEST(GroupedMedians, DirtyOutputs) { size_t NR = 56, NC = 179; diff --git a/tests/src/grouped_sums.cpp b/tests/src/grouped_sums.cpp index 7123f71..bf5c3f5 100644 --- a/tests/src/grouped_sums.cpp +++ b/tests/src/grouped_sums.cpp @@ -191,6 +191,42 @@ TEST(GroupedSums, EdgeCases) { EXPECT_TRUE(tatami_stats::grouped_sums::by_column(&empty1, grouping.data()).empty()); } +TEST(GroupedSums, NewType) { + size_t NR = 98, NC = 152; + auto dump = tatami_test::simulate_sparse_vector(NR * NC, 0.1, /* lower = */ 1, /* upper = */ 100); + for (auto& d : dump) { + d = std::round(d); + } + auto ref = std::unique_ptr(new tatami::DenseRowMatrix(NR, NC, dump)); + + std::vector cgrouping; + for (size_t c = 0; c < NC; ++c) { + cgrouping.push_back(c % 5); + } + std::vector rgrouping; + for (size_t r = 0; r < NR; ++r) { + rgrouping.push_back(r % 7); + } + auto rexpected = tatami_stats::grouped_sums::by_row(ref.get(), cgrouping.data()); + auto cexpected = tatami_stats::grouped_sums::by_column(ref.get(), rgrouping.data()); + + std::vector ivec(dump.begin(), dump.end()); + auto dense_row = std::make_shared >(NR, NC, std::move(ivec)); + auto dense_column = tatami::convert_to_dense(dense_row.get(), false); + auto sparse_row = tatami::convert_to_compressed_sparse(dense_row.get(), true); + auto sparse_column = tatami::convert_to_compressed_sparse(dense_row.get(), false); + + EXPECT_EQ(tatami_stats::grouped_sums::by_row(dense_row.get(), cgrouping.data()), rexpected); + EXPECT_EQ(tatami_stats::grouped_sums::by_row(dense_column.get(), cgrouping.data()), rexpected); + EXPECT_EQ(tatami_stats::grouped_sums::by_row(sparse_row.get(), cgrouping.data()), rexpected); + EXPECT_EQ(tatami_stats::grouped_sums::by_row(sparse_column.get(), cgrouping.data()), rexpected); + + EXPECT_EQ(tatami_stats::grouped_sums::by_column(dense_row.get(), rgrouping.data()), cexpected); + EXPECT_EQ(tatami_stats::grouped_sums::by_column(dense_column.get(), rgrouping.data()), cexpected); + EXPECT_EQ(tatami_stats::grouped_sums::by_column(sparse_row.get(), rgrouping.data()), cexpected); + EXPECT_EQ(tatami_stats::grouped_sums::by_column(sparse_column.get(), rgrouping.data()), cexpected); +} + TEST(GroupedSums, DirtyOutputs) { size_t NR = 56, NC = 179; diff --git a/tests/src/grouped_variances.cpp b/tests/src/grouped_variances.cpp index 7fd2bc1..ada0310 100644 --- a/tests/src/grouped_variances.cpp +++ b/tests/src/grouped_variances.cpp @@ -213,6 +213,42 @@ TEST(GroupedVariances, EdgeCases) { } } +TEST(GroupedVariances, NewType) { + size_t NR = 198, NC = 52; + auto dump = tatami_test::simulate_sparse_vector(NR * NC, 0.1, /* lower = */ 1, /* upper = */ 100); + for (auto& d : dump) { + d = std::round(d); + } + auto ref = std::unique_ptr(new tatami::DenseRowMatrix(NR, NC, dump)); + + std::vector cgrouping; + for (size_t c = 0; c < NC; ++c) { + cgrouping.push_back(c % 5); + } + std::vector rgrouping; + for (size_t r = 0; r < NR; ++r) { + rgrouping.push_back(r % 7); + } + auto rexpected = tatami_stats::grouped_variances::by_row(ref.get(), cgrouping.data()); + auto cexpected = tatami_stats::grouped_variances::by_column(ref.get(), rgrouping.data()); + + std::vector ivec(dump.begin(), dump.end()); + auto dense_row = std::make_shared >(NR, NC, std::move(ivec)); + auto dense_column = tatami::convert_to_dense(dense_row.get(), false); + auto sparse_row = tatami::convert_to_compressed_sparse(dense_row.get(), true); + auto sparse_column = tatami::convert_to_compressed_sparse(dense_row.get(), false); + + compare_double_vectors_of_vectors(tatami_stats::grouped_variances::by_row(dense_row.get(), cgrouping.data()), rexpected); + compare_double_vectors_of_vectors(tatami_stats::grouped_variances::by_row(dense_column.get(), cgrouping.data()), rexpected); + compare_double_vectors_of_vectors(tatami_stats::grouped_variances::by_row(sparse_row.get(), cgrouping.data()), rexpected); + compare_double_vectors_of_vectors(tatami_stats::grouped_variances::by_row(sparse_column.get(), cgrouping.data()), rexpected); + + compare_double_vectors_of_vectors(tatami_stats::grouped_variances::by_column(dense_row.get(), rgrouping.data()), cexpected); + compare_double_vectors_of_vectors(tatami_stats::grouped_variances::by_column(dense_column.get(), rgrouping.data()), cexpected); + compare_double_vectors_of_vectors(tatami_stats::grouped_variances::by_column(sparse_row.get(), rgrouping.data()), cexpected); + compare_double_vectors_of_vectors(tatami_stats::grouped_variances::by_column(sparse_column.get(), rgrouping.data()), cexpected); +} + TEST(GroupedVariances, DirtyOutputs) { int NR = 56, NC = 179; diff --git a/tests/src/medians.cpp b/tests/src/medians.cpp index 1cd5474..ba67558 100644 --- a/tests/src/medians.cpp +++ b/tests/src/medians.cpp @@ -309,6 +309,33 @@ TEST(ComputingDimMedians, RowMediansNaN) { EXPECT_TRUE(std::isnan(rref.back())); } +TEST(ComputingDimMedians, NewType) { + size_t NR = 198, NC = 52; + auto dump = tatami_test::simulate_sparse_vector(NR * NC, 0.1, /* lower = */ 1, /* upper = */ 100); + for (auto& d : dump) { + d = std::round(d); + } + auto ref = std::unique_ptr(new tatami::DenseRowMatrix(NR, NC, dump)); + auto rexpected = tatami_stats::medians::by_row(ref.get()); + auto cexpected = tatami_stats::medians::by_column(ref.get()); + + std::vector ivec(dump.begin(), dump.end()); + auto dense_row = std::make_shared >(NR, NC, std::move(ivec)); + auto dense_column = tatami::convert_to_dense(dense_row.get(), false); + auto sparse_row = tatami::convert_to_compressed_sparse(dense_row.get(), true); + auto sparse_column = tatami::convert_to_compressed_sparse(dense_row.get(), false); + + EXPECT_EQ(tatami_stats::medians::by_row(dense_row.get()), rexpected); + EXPECT_EQ(tatami_stats::medians::by_row(dense_column.get()), rexpected); + EXPECT_EQ(tatami_stats::medians::by_row(sparse_row.get()), rexpected); + EXPECT_EQ(tatami_stats::medians::by_row(sparse_column.get()), rexpected); + + EXPECT_EQ(tatami_stats::medians::by_column(dense_row.get()), cexpected); + EXPECT_EQ(tatami_stats::medians::by_column(dense_column.get()), cexpected); + EXPECT_EQ(tatami_stats::medians::by_column(sparse_row.get()), cexpected); + EXPECT_EQ(tatami_stats::medians::by_column(sparse_column.get()), cexpected); +} + TEST(ComputingDimMedians, DirtyOutput) { size_t NR = 99, NC = 152; auto dump = tatami_test::simulate_sparse_vector(NR * NC, 0.5, 1, 10); // see comments above about why we use 0.5. diff --git a/tests/src/ranges.cpp b/tests/src/ranges.cpp index 857c2c1..2de1b68 100644 --- a/tests/src/ranges.cpp +++ b/tests/src/ranges.cpp @@ -287,6 +287,33 @@ TEST(ComputingDimExtremes, NoZeros) { EXPECT_EQ(rref, tatami_stats::ranges::by_row(sparse_column.get())); } +TEST(ComputingDimExtremes, NewType) { + size_t NR = 198, NC = 52; + auto dump = tatami_test::simulate_sparse_vector(NR * NC, 0.1, /* lower = */ 1, /* upper = */ 100); + for (auto& d : dump) { + d = std::round(d); + } + auto ref = std::unique_ptr(new tatami::DenseRowMatrix(NR, NC, dump)); + auto rexpected = tatami_stats::ranges::by_row(ref.get()); + auto cexpected = tatami_stats::ranges::by_column(ref.get()); + + std::vector ivec(dump.begin(), dump.end()); + auto dense_row = std::make_shared >(NR, NC, std::move(ivec)); + auto dense_column = tatami::convert_to_dense(dense_row.get(), false); + auto sparse_row = tatami::convert_to_compressed_sparse(dense_row.get(), true); + auto sparse_column = tatami::convert_to_compressed_sparse(dense_row.get(), false); + + EXPECT_EQ(tatami_stats::ranges::by_row(dense_row.get()), rexpected); + EXPECT_EQ(tatami_stats::ranges::by_row(dense_column.get()), rexpected); + EXPECT_EQ(tatami_stats::ranges::by_row(sparse_row.get()), rexpected); + EXPECT_EQ(tatami_stats::ranges::by_row(sparse_column.get()), rexpected); + + EXPECT_EQ(tatami_stats::ranges::by_column(dense_row.get()), cexpected); + EXPECT_EQ(tatami_stats::ranges::by_column(dense_column.get()), cexpected); + EXPECT_EQ(tatami_stats::ranges::by_column(sparse_row.get()), cexpected); + EXPECT_EQ(tatami_stats::ranges::by_column(sparse_column.get()), cexpected); +} + TEST(ComputingDimExtremes, Empty) { auto dense_row = std::unique_ptr(new tatami::DenseRowMatrix(10, 0, std::vector())); auto cres = tatami_stats::ranges::by_column(dense_row.get()); diff --git a/tests/src/sums.cpp b/tests/src/sums.cpp index e4c822a..b6a1ae9 100644 --- a/tests/src/sums.cpp +++ b/tests/src/sums.cpp @@ -135,6 +135,33 @@ TEST(ComputingDimSums, ColumnSumsWithNan) { EXPECT_TRUE(is_all_nan(tatami_stats::sums::by_column(sparse_column.get()))); } +TEST(ComputingDimSums, NewType) { + size_t NR = 198, NC = 52; + auto dump = tatami_test::simulate_sparse_vector(NR * NC, 0.1, /* lower = */ 1, /* upper = */ 100); + for (auto& d : dump) { + d = std::round(d); + } + auto ref = std::unique_ptr(new tatami::DenseRowMatrix(NR, NC, dump)); + auto rexpected = tatami_stats::sums::by_row(ref.get()); + auto cexpected = tatami_stats::sums::by_column(ref.get()); + + std::vector ivec(dump.begin(), dump.end()); + auto dense_row = std::make_shared >(NR, NC, std::move(ivec)); + auto dense_column = tatami::convert_to_dense(dense_row.get(), false); + auto sparse_row = tatami::convert_to_compressed_sparse(dense_row.get(), true); + auto sparse_column = tatami::convert_to_compressed_sparse(dense_row.get(), false); + + EXPECT_EQ(tatami_stats::sums::by_row(dense_row.get()), rexpected); + EXPECT_EQ(tatami_stats::sums::by_row(dense_column.get()), rexpected); + EXPECT_EQ(tatami_stats::sums::by_row(sparse_row.get()), rexpected); + EXPECT_EQ(tatami_stats::sums::by_row(sparse_column.get()), rexpected); + + EXPECT_EQ(tatami_stats::sums::by_column(dense_row.get()), cexpected); + EXPECT_EQ(tatami_stats::sums::by_column(dense_column.get()), cexpected); + EXPECT_EQ(tatami_stats::sums::by_column(sparse_row.get()), cexpected); + EXPECT_EQ(tatami_stats::sums::by_column(sparse_column.get()), cexpected); +} + TEST(ComputingDimSums, DirtyOutput) { size_t NR = 99, NC = 152; auto dump = tatami_test::simulate_sparse_vector(NR * NC, 0.1); diff --git a/tests/src/variances.cpp b/tests/src/variances.cpp index f8f9d42..7b6eefe 100644 --- a/tests/src/variances.cpp +++ b/tests/src/variances.cpp @@ -165,6 +165,33 @@ TEST(ComputingDimVariances, ColumnVariancesWithNan) { EXPECT_TRUE(is_all_nan(tatami_stats::variances::by_column(sparse_column.get()))); } +TEST(ComputingDimVariances, NewType) { + size_t NR = 198, NC = 52; + auto dump = tatami_test::simulate_sparse_vector(NR * NC, 0.1, /* lower = */ 1, /* upper = */ 100); + for (auto& d : dump) { + d = std::round(d); + } + auto ref = std::unique_ptr(new tatami::DenseRowMatrix(NR, NC, dump)); + auto rexpected = tatami_stats::variances::by_row(ref.get()); + auto cexpected = tatami_stats::variances::by_column(ref.get()); + + std::vector ivec(dump.begin(), dump.end()); + auto dense_row = std::make_shared >(NR, NC, std::move(ivec)); + auto dense_column = tatami::convert_to_dense(dense_row.get(), false); + auto sparse_row = tatami::convert_to_compressed_sparse(dense_row.get(), true); + auto sparse_column = tatami::convert_to_compressed_sparse(dense_row.get(), false); + + EXPECT_EQ(tatami_stats::variances::by_row(dense_row.get()), rexpected); + compare_double_vectors(tatami_stats::variances::by_row(dense_column.get()), rexpected); + compare_double_vectors(tatami_stats::variances::by_row(sparse_row.get()), rexpected); + compare_double_vectors(tatami_stats::variances::by_row(sparse_column.get()), rexpected); + + EXPECT_EQ(tatami_stats::variances::by_column(dense_row.get()), cexpected); + compare_double_vectors(tatami_stats::variances::by_column(dense_column.get()), cexpected); + compare_double_vectors(tatami_stats::variances::by_column(sparse_row.get()), cexpected); + compare_double_vectors(tatami_stats::variances::by_column(sparse_column.get()), cexpected); +} + TEST(ComputingDimVariances, DirtyOutput) { size_t NR = 99, NC = 152; auto dump = tatami_test::simulate_sparse_vector(NR * NC, 0.1);