Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dirichlet family #60

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.Rproj.user
.Rhistory
.RData
.Ruserdata
10 changes: 9 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,16 @@ Description: Boosting models for fitting generalized additive models for
data.
Depends: R (>= 2.10.0), mboost (>= 2.8-0), stabs (>= 0.5-0), parallel
Imports: graphics, grDevices, stats, utils
Suggests: gamlss, gamlss.dist, survival, BayesX, R2BayesX
Suggests:
gamlss,
gamlss.dist,
survival,
BayesX,
R2BayesX,
testthat (>= 3.0.0),
DirichletReg
LazyLoad: yes
LazyData: yes
License: GPL-2
URL: For source code, development versions and issue tracker see https://github.com/boost-R/gamboostLSS
Config/testthat/edition: 3
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ export(glmboostLSS,
BetaLSS, BetaMu, BetaPhi,
WeibullLSS, WeibullMu, WeibullSigma,
ZIPoLSS, ZINBLSS,
DirichletAlpha, DirichletLSS,
gamlss.Families, as.families,
gamlss1parMu,
gamlss2parMu, gamlss2parSigma,
Expand Down
68 changes: 68 additions & 0 deletions R/families.R
Original file line number Diff line number Diff line change
Expand Up @@ -880,3 +880,71 @@ ZINBLSS <- function(mu = NULL, sigma = NULL, nu = NULL,

fam
}

###
# Dirichtlet LSS Family
DirichletAlpha <- function(k = NULL, K = NULL) {
# Generate argument names dynamically
arg <- c("y", paste0("a", 1:K))
arg[arg == paste0("a", k)] <- "f" # Replace the parameter for `k` with `f`

# Loss function
loss <- function(y, f) {
A <- NULL
eval(parse(text = paste0("A <- cbind(",
gsub("f", "exp(f)",
paste0(grep("a|f", arg, value = TRUE), collapse = ",")),
")")))
result <- - (lgamma(rowSums(A)) - rowSums(lgamma(A)) + rowSums((A - 1) * log(y)))
return(result)
}

# Negative gradient function
ngradient <- function(y, f, w = 1) {
A <- NULL
eval(parse(text = paste0("A <- cbind(",
gsub("f", "exp(f)",
paste0(grep("a|f", arg, value = TRUE), collapse = ",")),
")")))
ngr <- A[, k] * (digamma(rowSums(A)) - digamma(A[, k]) + log(y[, k]))
return(ngr)
}

# Risk function
risk <- function(y, f, w = 1) {
sum(w * loss(y, f))
}

# Offset function
offset <- function(y, w) {
RET <- min(y[, k])
return(RET)
}

# Return Family object
Family(
ngradient = ngradient,
risk = risk,
loss = loss,
response = function(f) exp(f),
offset = offset,
name = paste0("Dirichlet Distribution: a", k, " (log link)")
)
}


DirichletLSS <- function(K = NULL) {
# Check if K is specified
if (is.null(K)) {
stop("Number of categories 'K' must be specified.")
}

# Construct the family string dynamically
fam <- paste(sapply(1:K, function(k) {
paste0("a", k, " = DirichletAlpha(k = ", k, ", K = K)")
}), collapse = ",")

# Dynamically evaluate and return the Families object
eval(parse(text = paste0("Families(", fam, ")")))
}

20 changes: 19 additions & 1 deletion man/families.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
\alias{ZIPoLSS}
\alias{ZINBLSS}

\alias{DirichletAlpha}
\alias{DirichletLSS}

\alias{options}
\alias{stab_ngrad}
\alias{stabilize_ngrad}
Expand Down Expand Up @@ -112,6 +115,10 @@ LogLogLSS(mu = NULL, sigma = NULL,
# Weibull distribution
WeibullLSS(mu = NULL, sigma = NULL,
stabilization = c("none", "MAD", "L2"))

############################################################
# Family for Dirichlet regression models
DirichletLSS(K = NULL)

############################################################
# Constructor function for new GAMLSS distributions
Expand All @@ -133,6 +140,7 @@ Families(..., qfun = NULL, name = NULL)
standardized in each boosting step. It can be either "none",
"MAD" or "L2". See also Details below.
}
\item{K}{An integer specifying the number of categories in the Dirichlet distribution. This must be provided.}
}
\details{

Expand All @@ -159,6 +167,14 @@ Families(..., qfun = NULL, name = NULL)
location and scale parameters (with log link) of the negative binomial
component of the model. The zero-generating process (with logit link)
is represented by \code{nu}.

The \code{DirichletLSS} family can be used to fit Dirichlet regression models.
Here, \code{DirichletAlpha} corresponds to the distributional parameters in
the Dirichlet distribution which are dependent on the number of categories
in the respective compositional data (that is, proportions, amounts or rates.)
The number of categories, thereby distributional parameters in the data set
(K) has to be specified manually in the beginning of the fitting process
of the model.

The \code{Families} function can be used to implements a new GAMLSS
distribution which can be used for fitting by \code{\link{mboostLSS}}.
Expand Down Expand Up @@ -192,8 +208,10 @@ Families(..., qfun = NULL, name = NULL)
}

\author{
\code{BetaLSS} for boosting beta regression was implmented by Florian
\code{BetaLSS} for boosting beta regression was implemented by Florian
Wickler.
\code{DirichletLSS} for boosting Dirichlet regression models was implemented by
Michael Balzer.
}

\references{
Expand Down
68 changes: 68 additions & 0 deletions tests/regtest-families.R
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,74 @@ stopifnot(sum(abs(coef(model, off2int = TRUE, which ="")[[1]] - c(3, 1, 2, 0)))
< sum(abs(coef(m1) - c(3, 1, 2, 0))))
stopifnot(sum(abs(coef(model, off2int = TRUE)[[2]] - c(0.2, 0, 0, 0))) < 0.4)

# Check Dirichlet family
n <- 150
p <- 10

x <- matrix(runif(p * n, 0,1), n)

x <- data.frame(x)

a1 <- exp(2.5*x[,1] - x[,2] + 3*x[,3])
a2 <- exp(2*x[,4] + 2*x[,5] - x[,6])
a3 <- exp(1.5*x[,7] - 1.5*x[,8] + x[,9])
A <- cbind(a1,a2,a3)

y <- DirichletReg::rdirichlet(nrow(A),A)

colnames(y) <- c("y1","y2","y3")


# Check cyclical
model <- glmboostLSS(y ~ ., data = x,
families = DirichletLSS(K=3),
control = boost_control(trace = TRUE, mstop = 1000, nu = 0.1))
model2 <- glmboostLSS(y ~ X1 + X2 + X3, data = x,
families = DirichletLSS(K=3),
control = boost_control(trace = TRUE, mstop = 1000, nu = 0.1))
# Check noncyclical
model3 <- glmboostLSS(y ~ ., data = x,
families = DirichletLSS(K=3),
control = boost_control(trace = TRUE, mstop = 1000, nu = 0.1), method = "noncyclic")
model4 <- glmboostLSS(y ~ X1 + X2 + X3, data = x,
families = DirichletLSS(K=3),
control = boost_control(trace = TRUE, mstop = 1000, nu = 0.1), method = "noncyclic")

model[300]
model2[300]
model3[300]
model4[300]
coef(model, off2int = TRUE)

# Check gamboostLSS for Dirichlet family
x.train <- matrix(rnorm(p * n, 0,1), n)
x.train <- data.frame(x.train)

a1.train <- exp(x.train[,1]**2)
a2.train <- exp(2*tanh(3*x.train[,2]))
a3.train <- exp(cos(x.train[,3]))

A <- cbind(a1.train,a2.train,a3.train)

y.train <- DirichletReg::rdirichlet(nrow(A),A)

x <- paste(c(paste("bbs(X", 1:p, ")", sep = "")), collapse = "+")
form <- as.formula((paste("y.train ~", x)))

model5 <- gamboostLSS(form, data = x.train,
families = DirichletLSS(K = 3),
control = boost_control(trace = TRUE, mstop = 500, nu = 0.1), method = 'noncyclic')
model5[300]
coef(model5[200])

# Check stabsel for Dirichlet family
modstabs <- stabsel(model, cutoff = 0.9, PFER = 5)
modstabs

# Check for correct error message for Dirichlet family
try(glmboostLSS(y ~ ., data = x,
families = DirichletLSS(),
control = boost_control(trace = TRUE, mstop = 1000, nu = 0.1)))

### Check that "families"-object contains a response function
NBinomialMu2 <- function(...){
Expand Down