Skip to content

Commit

Permalink
Merge pull request #108 from mayer79/zero-by-zero
Browse files Browse the repository at this point in the history
Deal with 0 by 0 divisions
  • Loading branch information
mayer79 authored Nov 16, 2023
2 parents 133641a + 0f7cbab commit bfca690
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 1 deletion.
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

## Other changes

- Removed an unnecessary special case when calculating column means.
- In multivariate cases, it was possible that normalized H-statistics could equal `0/0 (= NaN)`. Such values are now replaced by 0 ([Issue #107](https://github.com/mayer79/hstats/issues/107)).
- Removed an unnecessary special case when calculating column means ([PR #106](https://github.com/mayer79/hstats/pull/106)).

# hstats 1.1.0

Expand Down
1 change: 1 addition & 0 deletions R/utils_statistics.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ postprocess <- function(num, denom = rep(1, times = NCOL(num)),
} else {
out <- sweep(num, MARGIN = 2L, STATS = denom, FUN = "/")
}
out[is.nan(out)] <- 0 # Deal explicitly with 0/0 cases
} else {
out <- num
}
Expand Down
41 changes: 41 additions & 0 deletions tests/testthat/test_hstats.R
Original file line number Diff line number Diff line change
Expand Up @@ -361,3 +361,44 @@ test_that("hstats() works for factor predictions", {
out <- hstats(1, X = cbind(v1 = 0:1, v2 = 0), pred_fun = pf, verbose = FALSE)
expect_equal(out$h2$num, cbind(zero = 0, one = 0))
})

test_that("hstats() gives all zero statistics if prediction is constant", {
pf <- function(m, X) numeric(nrow(X))
out <- hstats(1, X = iris[1:4], pred_fun = pf, verbose = FALSE)

expect_equal(h2(out)$M, matrix(0))
expect_equal(out$h2$num / out$h2$denom, matrix(NaN))

expect_equal(c(h2_overall(out)$M), c(0, 0, 0, 0))
expect_equal(out$mean_f2, 0)
expect_equal(out$h2_overall$denom, out$mean_f2)
expect_equal(out$pd_importance$denom, 0)

expect_equal(c(h2_pairwise(out)$M), rep(0, choose(4, 2)))
expect_equal(c(out$h2_pairwise$denom), rep(1, choose(4, 2)))
})

test_that("behavior is ok if one dim is constant and other not", {
pf <- function(m, X) cbind(0, X$Sepal.Length * X$Petal.Length)
out <- hstats(1, X = iris[1:4], pred_fun = pf, verbose = FALSE)

expect_equal(c(h2(out)$M == 0), c(TRUE, FALSE))
expect_equal(out$h2$num[1L] / out$h2$denom[1L], NaN)

ex <- rbind(
Sepal.Length = c(TRUE, FALSE),
Sepal.Width = c(TRUE, TRUE),
Petal.Length = c(TRUE, FALSE),
Petal.Width = c(TRUE, TRUE)
)
expect_equal(h2_overall(out, sort = FALSE)$M == 0, ex)
expect_equal(out$mean_f2 == 0, c(TRUE, FALSE))
expect_equal(out$h2_overall$denom, out$mean_f2)

rs <- rowSums(h2_pairwise(out, sort = FALSE)$M) > 0
pos <- "Sepal.Length:Petal.Length"

expect_equal(row.names(h2_pairwise(out, sort = FALSE)$M[rs, , drop = FALSE]), pos)
expect_equal(out$h2_pairwise$denom[rs, ] == 0, c(TRUE, FALSE))
expect_true(all(out$h2_pairwise$denom[!rs, ] == 1))
})

0 comments on commit bfca690

Please sign in to comment.