diff --git a/authors.html b/authors.html index cfc5684..eb2d405 100644 --- a/authors.html +++ b/authors.html @@ -75,14 +75,14 @@

Citation

Mayer M (2024). hstats: Interaction Statistics. -R package version 1.2.0, https://mayer79.github.io/hstats, https://github.com/ModelOriented/hstats. +R package version 1.2.0, https://modeloriented.github.io/hstats/, https://github.com/ModelOriented/hstats/.

@Manual{,
   title = {hstats: Interaction Statistics},
   author = {Michael Mayer},
   year = {2024},
-  note = {R package version 1.2.0, https://mayer79.github.io/hstats},
-  url = {https://github.com/ModelOriented/hstats},
+  note = {R package version 1.2.0, https://modeloriented.github.io/hstats/},
+  url = {https://github.com/ModelOriented/hstats/},
 }
diff --git a/index.html b/index.html index 984a64a..b775509 100644 --- a/index.html +++ b/index.html @@ -166,7 +166,7 @@

Usage

Fit model

-library(hstats)
+library(hstats)
 library(ggplot2)
 library(xgboost)
 library(shapviz)
@@ -251,7 +251,8 @@ 

Describe interaction
-plot(partial_dep(fit, v = "age", X = X_train, BY = "log_ocean"), show_points = FALSE)

+partial_dep(fit, v = "age", X = X_train, BY = "log_ocean") |> + plot(show_points = FALSE)

 pd <- partial_dep(fit, v = c("age", "log_ocean"), X = X_train, grid_size = 1000)
@@ -259,8 +260,8 @@ 

Describe interactionplot(pd, d2_geom = "line", show_points = FALSE)

-ic <- ice(fit, v = "age", X = X_train, BY = "log_ocean")
-plot(ic, center = TRUE)
+ice(fit, v = "age", X = X_train, BY = "log_ocean") |> + plot(center = TRUE)

@@ -268,11 +269,14 @@

Variable importance
-plot(pd_importance(s))
+pd_importance(s) |> 
+  plot()
 
 # Compared with four times repeated permutation importance regarding MSE
 set.seed(10)
-plot(perm_importance(fit, X = X_valid, y = y_valid))

+ +perm_importance(fit, X = X_valid, y = y_valid) |> + plot()

Permutation importance returns the same order in this case:

@@ -283,7 +287,7 @@

DALEX

The main functions work smoothly on DALEX explainers:

-library(hstats)
+library(hstats)
 library(DALEX)
 library(ranger)
 
@@ -296,14 +300,17 @@ 

DALEX s # 0.054 plot(s) -# Strongest relative interaction -plot(ice(ex, v = "Sepal.Width", BY = "Petal.Width"), center = TRUE) -plot(partial_dep(ex, v = "Sepal.Width", BY = "Petal.Width"), show_points = FALSE) -plot(partial_dep(ex, v = c("Sepal.Width", "Petal.Width"), grid_size = 200)) +# Strongest relative interaction (different visualizations) +ice(ex, v = "Sepal.Width", BY = "Petal.Width") |> + plot(center = TRUE) + +partial_dep(ex, v = "Sepal.Width", BY = "Petal.Width") |> + plot(show_points = FALSE) -perm_importance(ex) +partial_dep(ex, v = c("Sepal.Width", "Petal.Width"), grid_size = 200) |> + plot() -# Permutation importance +perm_importance(ex) # Petal.Length Petal.Width Sepal.Width Species # 0.59836442 0.11625137 0.07966910 0.03982554

@@ -323,7 +330,7 @@

Multivariate responsesCommon preparation

-library(hstats)
+library(hstats)
 
 ix <- c(1:40, 51:90, 101:140)
 train <- iris[ix, ]
@@ -380,7 +387,7 @@ 

LightGBMaverage_loss(fit, X = X_valid, y = y_valid, loss = "mlogloss") perm_importance(fit, X = X_valid, y = y_valid, loss = "mlogloss", m_rep = 100) -# Permutation importance regarding mlogloss + # Petal.Length Petal.Width Sepal.Width Sepal.Length # 2.624241332 1.011168660 0.082477177 0.009757393 @@ -456,7 +463,7 @@

Meta-learning packagestidymodels

-library(hstats)
+library(hstats)
 library(tidymodels)
 
 set.seed(1)
@@ -476,7 +483,9 @@ 

tidymodelss <- hstats(fit, X = iris[, -1]) s # 0 -> no interactions -plot(partial_dep(fit, v = "Petal.Width", X = iris)) + +partial_dep(fit, v = "Petal.Width", X = iris) |> + plot() imp <- perm_importance(fit, X = iris, y = "Sepal.Length") imp @@ -490,7 +499,7 @@

tidymodelscaret

-library(hstats)
+library(hstats)
 library(caret)
 
 set.seed(1)
@@ -505,14 +514,17 @@ 

caret h2(hstats(fit, X = iris[, -1])) # 0 -plot(ice(fit, v = "Petal.Width", X = iris), center = TRUE) -plot(perm_importance(fit, X = iris, y = "Sepal.Length"))

+ice(fit, v = "Petal.Width", X = iris) |> + plot(center = TRUE) + +perm_importance(fit, X = iris, y = "Sepal.Length") |> + plot()

mlr3

diff --git a/news/index.html b/news/index.html index d08da37..477608b 100644 --- a/news/index.html +++ b/news/index.html @@ -58,19 +58,15 @@

Changelog

-

New home

+

My new home

-

Major changes

+

Other changes

  • Factor-valued predictions are no longer possible.
  • Consequently, also removed “classification_error” loss.
-
-

Minor changes

-
  • Code simplifications.
  • -
@@ -88,30 +84,30 @@

APIhstats 1.1.12023-12-06

Performance improvements

-
  • For pure data.frames (no tibbles, data.tables etc.), most functions are significantly faster (#110).
  • -
  • Slight speed-up of permutation importance for non-matrix X (#109).
  • +
    • For pure data.frames (no tibbles, data.tables etc.), most functions are significantly faster (#110).
    • +
    • Slight speed-up of permutation importance for non-matrix X (#109).

Other changes

-
  • In multivariate cases, it was possible that normalized H-statistics could equal 0/0 (= NaN). Such values are now replaced by 0 (#107).
  • -
  • Removed an unnecessary special case when calculating column means (#106).
  • +
    • In multivariate cases, it was possible that normalized H-statistics could equal 0/0 (= NaN). Such values are now replaced by 0 (#107).
    • +
    • Removed an unnecessary special case when calculating column means (#106).

Enhancements

-
  • {hstats} now also works for factor predictions. The levels are represented by one-hot-encoded columns (PR#101).
  • -
  • The plot method of a two-dimensional PDP has recieved the option d2_geom = "line". Instead of a heatmap of the two features, one of the features is moved to color grouping. Combined with swap_dim = TRUE, you can swap the role of the two v variables without recalculating anything. The idea was proposed by Roel Verbelen in issue #91, see also issue #94.
  • +
    • {hstats} now also works for factor predictions. The levels are represented by one-hot-encoded columns (PR#101).
    • +
    • The plot method of a two-dimensional PDP has recieved the option d2_geom = "line". Instead of a heatmap of the two features, one of the features is moved to color grouping. Combined with swap_dim = TRUE, you can swap the role of the two v variables without recalculating anything. The idea was proposed by Roel Verbelen in issue #91, see also issue #94.

Bug fixes

-
  • Using BY and w via column names would fail for tibbles. This problem was described in #92 by Roel Verbelen. Thx!
  • +
    • Using BY and w via column names would fail for tibbles. This problem was described in #92 by Roel Verbelen. Thx!

Other changes

-
  • Much faster one-hot-encoding, thanks to Mathias Ambühl (PR#101).
  • -
  • Most functions are slightly faster (PR#101).
  • +
    • Much faster one-hot-encoding, thanks to Mathias Ambühl (PR#101).
    • +
    • Most functions are slightly faster (PR#101).
    • Add unit tests to compare against {iml}.
    • Made all examples “tibble” and “data.table” friendly.
    • Revised input checks in loss functions (relevant for perm_importance() and average_loss()).
    • diff --git a/pkgdown.yml b/pkgdown.yml index a302a5e..8c763c2 100644 --- a/pkgdown.yml +++ b/pkgdown.yml @@ -2,4 +2,4 @@ pandoc: 3.1.11 pkgdown: 2.1.0 pkgdown_sha: ~ articles: {} -last_built: 2024-07-11T12:36Z +last_built: 2024-07-12T12:34Z