Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix simulate! when only the estimable coefficients are provided #756

Merged
merged 18 commits into from
Mar 25, 2024
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
MixedModels v4.23.1 Release Notes
==============================
* Fix for `simulate!` when only the estimable coefficients for a rank-deficient model are provided. [#756]
* Improve handling of rank deficiency in GLMM. [#756]
* Fix display of GLMM bootstrap without a dispersion parameter. [#756]

MixedModels v4.23.0 Release Notes
==============================
* Support for rank deficiency in the parametric bootstrap. [#755]
Expand Down Expand Up @@ -509,3 +515,4 @@ Package dependencies
[#744]: https://github.com/JuliaStats/MixedModels.jl/issues/744
[#748]: https://github.com/JuliaStats/MixedModels.jl/issues/748
[#755]: https://github.com/JuliaStats/MixedModels.jl/issues/755
[#756]: https://github.com/JuliaStats/MixedModels.jl/issues/756
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "MixedModels"
uuid = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316"
author = ["Phillip Alday <[email protected]>", "Douglas Bates <[email protected]>", "Jose Bayoan Santiago Calderon <[email protected]>"]
version = "4.23.0"
version = "4.23.1"

[deps]
Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
Expand Down
11 changes: 11 additions & 0 deletions src/Xymat.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,22 @@ Base.copyto!(A::FeTerm{T}, src::AbstractVecOrMat{T}) where {T} = copyto!(A.x, sr

Base.eltype(::FeTerm{T}) where {T} = T

"""
pivot(m::MixedModel)
pivot(A::FeTerm)

Return the pivot associated with the FeTerm.
"""
@inline pivot(m::MixedModel) = pivot(m.feterm)
@inline pivot(A::FeTerm) = A.piv

function fullrankx(A::FeTerm)
x, rnk = A.x, A.rank
return rnk == size(x, 2) ? x : view(x, :, 1:rnk) # this handles the zero-columns case
end

fullrankx(m::MixedModel) = fullrankx(m.feterm)

LinearAlgebra.rank(A::FeTerm) = A.rank

"""
Expand Down
14 changes: 8 additions & 6 deletions src/bootstrap.jl
Original file line number Diff line number Diff line change
Expand Up @@ -626,11 +626,12 @@ function σρ!(v::AbstractVector, d::Diagonal, σ)
return append!(v, σ .* d.diag)
end

function σρ!(v::AbstractVector{T}, t::LowerTriangular{T}, σ::T) where {T}
"""
σρ!(v, t, σ)
push! `σ` times the row lengths (σs) and the inner products of normalized rows (ρs) of `t` onto `v`
"""
"""
σρ!(v, t, σ)

push! `σ` times the row lengths (σs) and the inner products of normalized rows (ρs) of `t` onto `v`
"""
function σρ!(v::AbstractVector{<:Union{T,Missing}}, t::LowerTriangular, σ) where {T}
dat = t.data
for i in axes(dat, 1)
ssqr = zero(T)
Expand Down Expand Up @@ -659,11 +660,12 @@ end

function pbstrtbl(bsamp::MixedModelFitCollection{T}) where {T}
(; fits, λ) = bsamp
Tfull = ismissing(first(bsamp.fits).σ) ? Union{T,Missing} : T
λcp = copy.(λ)
syms = _syms(bsamp)
m = length(syms)
n = length(fits)
v = sizehint!(T[], m * n)
v = sizehint!(Tfull[], m * n)
for f in fits
(; β, θ, σ) = f
push!(v, f.objective)
Expand Down
9 changes: 5 additions & 4 deletions src/generalizedlinearmixedmodel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
end

function StatsAPI.coef(m::GeneralizedLinearMixedModel{T}) where {T}
piv = m.LMM.feterm.piv
piv = pivot(m)
return invpermute!(copyto!(fill(T(-0.0), length(piv)), m.β), piv)
end

Expand Down Expand Up @@ -426,6 +426,7 @@
LMM.optsum,
)
end
X = fullrankx(LMM.feterm)
# if the response is constant, there's no point (and this may even fail)
# we allow this instead of simply failing so that a constant response can
# be used as the starting point to simulation where the response will be
Expand All @@ -439,8 +440,8 @@
# XXX unfortunately, this means we have double-rank deficiency detection
# TODO: construct GLM by hand so that we skip collinearity checks
# TODO: extend this so that we never fit a GLM when initializing from LMM
dofit = size(LMM.X, 2) != 0 # GLM.jl kwarg
gl = glm(LMM.X, y, d, l;
dofit = size(X, 2) != 0 # GLM.jl kwarg
gl = glm(X, y, d, l;
wts=convert(Vector{T}, wts),
dofit,
offset=convert(Vector{T}, offset))
Expand Down Expand Up @@ -718,7 +719,7 @@
io::IO, ::MIME"text/plain", m::GeneralizedLinearMixedModel{T,D}
) where {T,D}
if m.optsum.feval < 0
@warn("Model has not been fit")

Check warning on line 722 in src/generalizedlinearmixedmodel.jl

View workflow job for this annotation

GitHub Actions / Documentation

Model has not been fit
return nothing
end
nAGQ = m.LMM.optsum.nAGQ
Expand Down Expand Up @@ -793,7 +794,7 @@
b = m.b
u = m.u
reterms = m.LMM.reterms
mul!(η, modelmatrix(m), m.β)
mul!(η, fullrankx(m), m.β)
for i in eachindex(b)
mul!(η, reterms[i], vec(mul!(b[i], reterms[i].λ, u[i])), one(T), one(T))
end
Expand Down
10 changes: 6 additions & 4 deletions src/linearmixedmodel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -267,11 +267,11 @@ function StatsAPI.fit(
end

function StatsAPI.coef(m::LinearMixedModel{T}) where {T}
return coef!(Vector{T}(undef, length(m.feterm.piv)), m)
return coef!(Vector{T}(undef, length(pivot(m))), m)
end

function coef!(v::AbstractVector{Tv}, m::MixedModel{T}) where {Tv,T}
piv = m.feterm.piv
piv = pivot(m)
return invpermute!(fixef!(v, m), piv)
end

Expand Down Expand Up @@ -904,6 +904,8 @@ Overwrite `v` with the conditional modes of the random effects for `m`.

If `uscale` is `true` the random effects are on the spherical (i.e. `u`) scale, otherwise
on the original scale

`β` is the truncated, pivoted coefficient vector.
"""
function ranef!(
v::Vector, m::LinearMixedModel{T}, β::AbstractArray{T}, uscale::Bool
Expand Down Expand Up @@ -1241,12 +1243,12 @@ function stderror!(v::AbstractVector{Tv}, m::LinearMixedModel{T}) where {Tv,T}
scr[i] = true
v[i] = s * norm(ldiv!(L, scr))
end
invpermute!(v, m.feterm.piv)
invpermute!(v, pivot(m))
return v
end

function StatsAPI.stderror(m::LinearMixedModel{T}) where {T}
return stderror!(similar(m.feterm.piv, T), m)
return stderror!(similar(pivot(m), T), m)
end

"""
Expand Down
6 changes: 3 additions & 3 deletions src/predict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ Similarly, offsets are also not supported for `GeneralizedLinearMixedModel`.
function StatsAPI.predict(
m::LinearMixedModel, newdata::Tables.ColumnTable; new_re_levels=:missing
)
return _predict(m, newdata, coef(m)[m.feterm.piv]; new_re_levels)
return _predict(m, newdata, coef(m)[pivot(m)]; new_re_levels)
end

function StatsAPI.predict(
Expand All @@ -81,7 +81,7 @@ function StatsAPI.predict(
)
type in (:linpred, :response) || throw(ArgumentError("Invalid value for type: $(type)"))
# want pivoted but not truncated
y = _predict(m.LMM, newdata, coef(m)[m.feterm.piv]; new_re_levels)
y = _predict(m.LMM, newdata, coef(m)[pivot(m)]; new_re_levels)

return type == :linpred ? y : broadcast!(Base.Fix1(linkinv, Link(m)), y, y)
end
Expand Down Expand Up @@ -126,7 +126,7 @@ function _predict(m::MixedModel{T}, newdata, β; new_re_levels) where {T}
ytemp, lmm
end

pivotmatch = mnew.feterm.piv[m.feterm.piv]
pivotmatch = pivot(mnew)[pivot(m)]
grps = fnames(m)
mul!(y, view(mnew.X, :, pivotmatch), β)
# mnew.reterms for the correct Z matrices
Expand Down
18 changes: 9 additions & 9 deletions src/simulate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -149,17 +149,18 @@ end
function simulate!(
rng::AbstractRNG, y::AbstractVector, m::LinearMixedModel{T}; β=m.β, σ=m.σ, θ=m.θ
) where {T}
length(β) == length(m.feterm.piv) || length(β) == m.feterm.rank ||
length(β) == length(pivot(m)) || length(β) == m.feterm.rank ||
throw(ArgumentError("You must specify all (non-singular) βs"))

β = convert(Vector{T}, β)
σ = T(σ)
θ = convert(Vector{T}, θ)
isempty(θ) || setθ!(m, θ)

if length(β) ≠ length(m.feterm.piv)
padding = length(model.feterm.piv) - m.feterm.rank
if length(β) ≠ length(pivot(m))
padding = length(pivot(m)) - rank(m)
append!(β, fill(-0.0, padding))
invpermute!(β, pivot(m))
end

# initialize y to standard normal
Expand Down Expand Up @@ -228,7 +229,7 @@ function _simulate!(
θ,
resp=nothing,
) where {T}
length(β) == length(m.feterm.piv) || length(β) == m.feterm.rank ||
length(β) == length(pivot(m)) || length(β) == m.feterm.rank ||
throw(ArgumentError("You must specify all (non-singular) βs"))

dispersion_parameter(m) ||
Expand All @@ -247,11 +248,10 @@ function _simulate!(

d = m.resp.d

if length(β) length(m.feterm.piv)
padding = length(model.feterm.piv) - m.feterm.rank
append!(β, fill(-0.0, padding))
if length(β) == length(pivot(m))
# unlike LMM, GLMM stores the truncated, pivoted vector directly
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tangential to this PR but it could be convenient to have some kind of pivot accessor to retrieve this array.

β = view(β, view(pivot(m), 1:(rank(m))))
end

fast = (length(m.θ) == length(m.optsum.final))
setpar! = fast ? setθ! : setβθ!
params = fast ? θ : vcat(β, θ)
Expand All @@ -271,7 +271,7 @@ function _simulate!(
# add fixed-effects contribution
# note that unit scaling may not be correct for
# families with a dispersion parameter
mul!(η, lm.X, β, one(T), one(T))
mul!(η, fullrankx(lm), β, one(T), one(T))

μ = resp === nothing ? linkinv.(Link(m), η) : GLM.updateμ!(resp, η).mu

Expand Down
27 changes: 17 additions & 10 deletions test/bootstrap.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ using Random
using Statistics
using StableRNGs
using Statistics
using Suppressor
using Tables
using Test

Expand Down Expand Up @@ -224,16 +225,22 @@ end
@testset "Rank deficient" begin
rng = MersenneTwister(0);
x = rand(rng, 100);
data = (x = x, x2 = 1.5 .* x, y = rand(rng, 100), z = repeat('A':'T', 5))
model = @suppress fit(MixedModel, @formula(y ~ x + x2 + (1|z)), data; progress=false)
boot = quickboot(model, 10)

dropped_idx = model.feterm.piv[end]
dropped_coef = coefnames(model)[dropped_idx]
@test all(boot.β) do nt
# if we're the dropped coef, then we must be -0.0
# need isequal because of -0.0
return nt.coefname != dropped_coef || isequal(nt.β, -0.0)
data = (x = x, x2 = 1.5 .* x, y = rand(rng, [0,1], 100), z = repeat('A':'T', 5))
@testset "$family" for family in [Normal(), Bernoulli()]
model = @suppress fit(MixedModel, @formula(y ~ x + x2 + (1|z)), data, family; progress=false)
boot = quickboot(model, 10)

dropped_idx = model.feterm.piv[end]
dropped_coef = coefnames(model)[dropped_idx]
@test all(boot.β) do nt
# if we're the dropped coef, then we must be -0.0
# need isequal because of -0.0
return nt.coefname != dropped_coef || isequal(nt.β, -0.0)
end

yc = simulate(StableRNG(1), model; β=coef(model))
yf = simulate(StableRNG(1), model; β=fixef(model))
@test all(x -> isapprox(x...), zip(yc, yf))
end
end
end
Expand Down
Loading