JuliaStats · palday · Mar 25, 2024 · Mar 20, 2024 · Mar 20, 2024 · Mar 20, 2024
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+MixedModels v4.23.1 Release Notes
+==============================
+* Fix for `simulate!` when only the estimable coefficients for a rank-deficient model are provided. [#756]
+* Improve handling of rank deficiency in GLMM. [#756]
+* Fix display of GLMM bootstrap without a dispersion parameter.
+
 MixedModels v4.23.0 Release Notes
 ==============================
 * Support for rank deficiency in the parametric bootstrap. [#755]
@@ -509,3 +515,4 @@ Package dependencies
 [#744]: https://github.com/JuliaStats/MixedModels.jl/issues/744
 [#748]: https://github.com/JuliaStats/MixedModels.jl/issues/748
 [#755]: https://github.com/JuliaStats/MixedModels.jl/issues/755
+[#756]: https://github.com/JuliaStats/MixedModels.jl/issues/756
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "MixedModels"
 uuid = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316"
 author = ["Phillip Alday <[email protected]>", "Douglas Bates <[email protected]>", "Jose Bayoan Santiago Calderon <[email protected]>"]
-version = "4.23.0"
+version = "4.23.1"
 
 [deps]
 Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"

diff --git a/src/bootstrap.jl b/src/bootstrap.jl
@@ -626,11 +626,12 @@ function σρ!(v::AbstractVector, d::Diagonal, σ)
     return append!(v, σ .* d.diag)
 end
 
-function σρ!(v::AbstractVector{T}, t::LowerTriangular{T}, σ::T) where {T}
-    """
-        σρ!(v, t, σ)
-    push! `σ` times the row lengths (σs) and the inner products of normalized rows (ρs) of `t` onto `v`
-    """
+"""
+    σρ!(v, t, σ)
+
+push! `σ` times the row lengths (σs) and the inner products of normalized rows (ρs) of `t` onto `v`
+"""
+function σρ!(v::AbstractVector{<:Union{T,Missing}}, t::LowerTriangular, σ) where {T}
     dat = t.data
     for i in axes(dat, 1)
         ssqr = zero(T)
@@ -659,11 +660,12 @@ end
 
 function pbstrtbl(bsamp::MixedModelFitCollection{T}) where {T}
     (; fits, λ) = bsamp
+    Tfull = ismissing(first(bsamp.fits).σ) ? Union{T,Missing} : T
     λcp = copy.(λ)
     syms = _syms(bsamp)
     m = length(syms)
     n = length(fits)
-    v = sizehint!(T[], m * n)
+    v = sizehint!(Tfull[], m * n)
     for f in fits
         (; β, θ, σ) = f
         push!(v, f.objective)

diff --git a/src/generalizedlinearmixedmodel.jl b/src/generalizedlinearmixedmodel.jl
@@ -426,6 +426,7 @@ function GeneralizedLinearMixedModel(
             LMM.optsum,
         )
     end
+    X = view(LMM.X, :, 1:LMM.feterm.rank)
     # if the response is constant, there's no point (and this may even fail)
     # we allow this instead of simply failing so that a constant response can
     # be used as the starting point to simulation where the response will be
@@ -439,11 +440,11 @@ function GeneralizedLinearMixedModel(
     # XXX unfortunately, this means we have double-rank deficiency detection
     # TODO: construct GLM by hand so that we skip collinearity checks
     # TODO: extend this so that we never fit a GLM when initializing from LMM
-    dofit = size(LMM.X, 2) != 0 # GLM.jl kwarg
-    gl = glm(LMM.X, y, d, l;
-        wts=convert(Vector{T}, wts),
-        dofit,
-        offset=convert(Vector{T}, offset))
+    dofit = size(X, 2) != 0 # GLM.jl kwarg
+    gl = glm(X, y, d, l;
+             wts=convert(Vector{T}, wts),
+             dofit,
+             offset=convert(Vector{T}, offset))
     β = dofit ? coef(gl) : T[]
     u = [fill(zero(eltype(y)), vsize(t), nlevs(t)) for t in LMM.reterms]
     # vv is a template vector used to initialize fields for AGQ
@@ -793,7 +794,7 @@ function updateη!(m::GeneralizedLinearMixedModel{T}) where {T}
     b = m.b
     u = m.u
     reterms = m.LMM.reterms
-    mul!(η, modelmatrix(m), m.β)
+    mul!(η, view(modelmatrix(m), :, 1:m.LMM.feterm.rank), m.β)
     for i in eachindex(b)
         mul!(η, reterms[i], vec(mul!(b[i], reterms[i].λ, u[i])), one(T), one(T))
     end

diff --git a/src/linearmixedmodel.jl b/src/linearmixedmodel.jl
@@ -904,6 +904,8 @@ Overwrite `v` with the conditional modes of the random effects for `m`.
 
 If `uscale` is `true` the random effects are on the spherical (i.e. `u`) scale, otherwise
 on the original scale
+
+`β` is the truncated, pivoted coefficient vector.
 """
 function ranef!(
     v::Vector, m::LinearMixedModel{T}, β::AbstractArray{T}, uscale::Bool

diff --git a/src/simulate.jl b/src/simulate.jl
@@ -158,8 +158,8 @@ function simulate!(
     isempty(θ) || setθ!(m, θ)
 
     if length(β) ≠ length(m.feterm.piv)
-        padding = length(model.feterm.piv) - m.feterm.rank
-        append!(β, fill(-0.0, padding))
+        β = invpermute!(copyto!(fill(-0.0, length(m.feterm.piv)), β),
+            m.feterm.piv)
     end
 
     # initialize y to standard normal
@@ -247,11 +247,10 @@ function _simulate!(
 
     d = m.resp.d
 
-    if length(β) ≠ length(m.feterm.piv)
-        padding = length(model.feterm.piv) - m.feterm.rank
-        append!(β, fill(-0.0, padding))
+    if length(β) == length(m.feterm.piv)
+        # unlike LMM, GLMM stores the truncated, pivoted vector directly
+        β = β[view(m.feterm.piv, 1:m.feterm.rank)]
     end
-
     fast = (length(m.θ) == length(m.optsum.final))
     setpar! = fast ? setθ! : setβθ!
     params = fast ? θ : vcat(β, θ)
@@ -271,7 +270,7 @@ function _simulate!(
     # add fixed-effects contribution
     # note that unit scaling may not be correct for
     # families with a dispersion parameter
-    mul!(η, lm.X, β, one(T), one(T))
+    mul!(η, view(lm.X, :, 1:lm.feterm.rank), β, one(T), one(T))
 
     μ = resp === nothing ? linkinv.(Link(m), η) : GLM.updateμ!(resp, η).mu
 

diff --git a/test/bootstrap.jl b/test/bootstrap.jl
@@ -5,6 +5,7 @@ using Random
 using Statistics
 using StableRNGs
 using Statistics
+using Suppressor
 using Tables
 using Test
 
@@ -224,16 +225,22 @@ end
     @testset "Rank deficient" begin
         rng = MersenneTwister(0);
         x = rand(rng, 100);
-        data = (x = x, x2 = 1.5 .* x, y = rand(rng, 100), z = repeat('A':'T', 5))
-        model = @suppress fit(MixedModel, @formula(y ~ x + x2 + (1|z)), data; progress=false)
-        boot = quickboot(model, 10)
-
-        dropped_idx = model.feterm.piv[end]
-        dropped_coef = coefnames(model)[dropped_idx]
-        @test all(boot.β) do nt
-            # if we're the dropped coef, then we must be -0.0
-            # need isequal because of -0.0
-           return nt.coefname != dropped_coef || isequal(nt.β, -0.0)
+        data = (x = x, x2 = 1.5 .* x, y = rand(rng, [0,1], 100), z = repeat('A':'T', 5))
+        @testset "$family" for family in [Normal(), Bernoulli()] 
+            model = @suppress fit(MixedModel, @formula(y ~ x + x2 + (1|z)), data, family; progress=false)
+            boot = quickboot(model, 10)
+
+            dropped_idx = model.feterm.piv[end]
+            dropped_coef = coefnames(model)[dropped_idx]
+            @test all(boot.β) do nt
+                # if we're the dropped coef, then we must be -0.0
+                # need isequal because of -0.0
+               return nt.coefname != dropped_coef || isequal(nt.β, -0.0)
+            end
+
+            yc = simulate(StableRNG(1), model; β=coef(model))
+            yf = simulate(StableRNG(1), model; β=fixef(model))
+            @test all(x -> isapprox(x...), zip(yc, yf))
         end
     end
 end